39 files changed, 732 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index c0e2235..2c887a9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -220,27 +220,27 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
   default:
     return None;
 
+  case AArch64::LD1i64:
+  case AArch64::LD2i64:
+    DestRegIdx = 0;
+    BaseRegIdx = 3;
+    OffsetIdx = -1;
+    IsPrePost = false;
+    break;
+
   case AArch64::LD1i8:
   case AArch64::LD1i16:
   case AArch64::LD1i32:
-  case AArch64::LD1i64:
   case AArch64::LD2i8:
   case AArch64::LD2i16:
   case AArch64::LD2i32:
-  case AArch64::LD2i64:
   case AArch64::LD3i8:
   case AArch64::LD3i16:
   case AArch64::LD3i32:
+  case AArch64::LD3i64:
   case AArch64::LD4i8:
   case AArch64::LD4i16:
   case AArch64::LD4i32:
-    DestRegIdx = 0;
-    BaseRegIdx = 3;
-    OffsetIdx = -1;
-    IsPrePost = false;
-    break;
-
-  case AArch64::LD3i64:
   case AArch64::LD4i64:
     DestRegIdx = -1;
     BaseRegIdx = 3;
@@ -264,23 +264,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
   case AArch64::LD1Rv4s:
   case AArch64::LD1Rv8h:
   case AArch64::LD1Rv16b:
-  case AArch64::LD1Twov1d:
-  case AArch64::LD1Twov2s:
-  case AArch64::LD1Twov4h:
-  case AArch64::LD1Twov8b:
-  case AArch64::LD2Twov2s:
-  case AArch64::LD2Twov4s:
-  case AArch64::LD2Twov8b:
-  case AArch64::LD2Rv1d:
-  case AArch64::LD2Rv2s:
-  case AArch64::LD2Rv4s:
-  case AArch64::LD2Rv8b:
     DestRegIdx = 0;
     BaseRegIdx = 1;
     OffsetIdx = -1;
     IsPrePost = false;
     break;
 
+  case AArch64::LD1Twov1d:
+  case AArch64::LD1Twov2s:
+  case AArch64::LD1Twov4h:
+  case AArch64::LD1Twov8b:
   case AArch64::LD1Twov2d:
   case AArch64::LD1Twov4s:
   case AArch64::LD1Twov8h:
@@ -301,10 +294,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
   case AArch64::LD1Fourv4s:
   case AArch64::LD1Fourv8h:
   case AArch64::LD1Fourv16b:
+  case AArch64::LD2Twov2s:
+  case AArch64::LD2Twov4s:
+  case AArch64::LD2Twov8b:
   case AArch64::LD2Twov2d:
   case AArch64::LD2Twov4h:
   case AArch64::LD2Twov8h:
   case AArch64::LD2Twov16b:
+  case AArch64::LD2Rv1d:
+  case AArch64::LD2Rv2s:
+  case AArch64::LD2Rv4s:
+  case AArch64::LD2Rv8b:
   case AArch64::LD2Rv2d:
   case AArch64::LD2Rv4h:
   case AArch64::LD2Rv8h:
@@ -345,32 +345,32 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     IsPrePost = false;
     break;
 
+  case AArch64::LD1i64_POST:
+  case AArch64::LD2i64_POST:
+    DestRegIdx = 1;
+    BaseRegIdx = 4;
+    OffsetIdx = 5;
+    IsPrePost = true;
+    break;
+
   case AArch64::LD1i8_POST:
   case AArch64::LD1i16_POST:
   case AArch64::LD1i32_POST:
-  case AArch64::LD1i64_POST:
   case AArch64::LD2i8_POST:
   case AArch64::LD2i16_POST:
   case AArch64::LD2i32_POST:
-  case AArch64::LD2i64_POST:
   case AArch64::LD3i8_POST:
   case AArch64::LD3i16_POST:
   case AArch64::LD3i32_POST:
+  case AArch64::LD3i64_POST:
   case AArch64::LD4i8_POST:
   case AArch64::LD4i16_POST:
   case AArch64::LD4i32_POST:
-    DestRegIdx = 1;
-    BaseRegIdx = 4;
-    OffsetIdx = 5;
-    IsPrePost = false;
-    break;
-
-  case AArch64::LD3i64_POST:
   case AArch64::LD4i64_POST:
     DestRegIdx = -1;
     BaseRegIdx = 4;
     OffsetIdx = 5;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
   case AArch64::LD1Onev1d_POST:
@@ -389,23 +389,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
   case AArch64::LD1Rv4s_POST:
   case AArch64::LD1Rv8h_POST:
   case AArch64::LD1Rv16b_POST:
-  case AArch64::LD1Twov1d_POST:
-  case AArch64::LD1Twov2s_POST:
-  case AArch64::LD1Twov4h_POST:
-  case AArch64::LD1Twov8b_POST:
-  case AArch64::LD2Twov2s_POST:
-  case AArch64::LD2Twov4s_POST:
-  case AArch64::LD2Twov8b_POST:
-  case AArch64::LD2Rv1d_POST:
-  case AArch64::LD2Rv2s_POST:
-  case AArch64::LD2Rv4s_POST:
-  case AArch64::LD2Rv8b_POST:
     DestRegIdx = 1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
+  case AArch64::LD1Twov1d_POST:
+  case AArch64::LD1Twov2s_POST:
+  case AArch64::LD1Twov4h_POST:
+  case AArch64::LD1Twov8b_POST:
   case AArch64::LD1Twov2d_POST:
   case AArch64::LD1Twov4s_POST:
   case AArch64::LD1Twov8h_POST:
@@ -426,10 +419,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
   case AArch64::LD1Fourv4s_POST:
   case AArch64::LD1Fourv8h_POST:
   case AArch64::LD1Fourv16b_POST:
+  case AArch64::LD2Twov2s_POST:
+  case AArch64::LD2Twov4s_POST:
+  case AArch64::LD2Twov8b_POST:
   case AArch64::LD2Twov2d_POST:
   case AArch64::LD2Twov4h_POST:
   case AArch64::LD2Twov8h_POST:
   case AArch64::LD2Twov16b_POST:
+  case AArch64::LD2Rv1d_POST:
+  case AArch64::LD2Rv2s_POST:
+  case AArch64::LD2Rv4s_POST:
+  case AArch64::LD2Rv8b_POST:
   case AArch64::LD2Rv2d_POST:
   case AArch64::LD2Rv4h_POST:
   case AArch64::LD2Rv8h_POST:
@@ -467,7 +467,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     DestRegIdx = -1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
   case AArch64::LDRBBroW:
@@ -572,8 +572,12 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     IsPrePost = true;
     break;
 
-  case AArch64::LDPDi:
+  case AArch64::LDNPDi:
+  case AArch64::LDNPQi:
+  case AArch64::LDNPSi:
   case AArch64::LDPQi:
+  case AArch64::LDPDi:
+  case AArch64::LDPSi:
     DestRegIdx = -1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
@@ -581,7 +585,6 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     break;
 
   case AArch64::LDPSWi:
-  case AArch64::LDPSi:
   case AArch64::LDPWi:
   case AArch64::LDPXi:
     DestRegIdx = 0;
@@ -592,18 +595,18 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
 
   case AArch64::LDPQpost:
   case AArch64::LDPQpre:
+  case AArch64::LDPDpost:
+  case AArch64::LDPDpre:
+  case AArch64::LDPSpost:
+  case AArch64::LDPSpre:
     DestRegIdx = -1;
     BaseRegIdx = 3;
     OffsetIdx = 4;
     IsPrePost = true;
     break;
 
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
   case AArch64::LDPSWpost:
   case AArch64::LDPSWpre:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
   case AArch64::LDPWpost:
   case AArch64::LDPWpre:
   case AArch64::LDPXpost:
@@ -687,9 +690,14 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
       if (!TII->isStridedAccess(MI))
         continue;
 
-      LoadInfo LdI = *getLoadInfo(MI);
-      unsigned OldTag = *getTag(TRI, MI, LdI);
-      auto &OldCollisions = TagMap[OldTag];
+      Optional<LoadInfo> OptLdI = getLoadInfo(MI);
+      if (!OptLdI)
+        continue;
+      LoadInfo LdI = *OptLdI;
+      Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI);
+      if (!OptOldTag)
+        continue;
+      auto &OldCollisions = TagMap[*OptOldTag];
       if (OldCollisions.size() <= 1)
         continue;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9d87988..9c57926 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9347,11 +9347,20 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
       return SDValue();
   }
 
-  // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from
-  // undoing this transformation.
-  SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32
-                         ? DAG.getRegister(AArch64::WZR, MVT::i32)
-                         : DAG.getRegister(AArch64::XZR, MVT::i64);
+  // Use a CopyFromReg WZR/XZR here to prevent
+  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
+  SDLoc DL(&St);
+  unsigned ZeroReg;
+  EVT ZeroVT;
+  if (VT.getVectorElementType().getSizeInBits() == 32) {
+    ZeroReg = AArch64::WZR;
+    ZeroVT = MVT::i32;
+  } else {
+    ZeroReg = AArch64::XZR;
+    ZeroVT = MVT::i64;
+  }
+  SDValue SplatVal =
+      DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
   return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 5049a39..5971997 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -441,8 +441,7 @@ def MSRpstateImm1 : MSRpstateImm0_1;
 def MSRpstateImm4 : MSRpstateImm0_15;
 
 // The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.  Add pseudo op so we can mark it as not having any side effects.
-let hasSideEffects = 0 in
+// TPIDR_EL0.
 def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
                        [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index cd9e7fb..025397b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -218,12 +218,17 @@ void GCNHazardRecognizer::RecedeCycle() {
 
 int GCNHazardRecognizer::getWaitStatesSince(
     function_ref<bool(MachineInstr *)> IsHazard) {
-  int WaitStates = -1;
+  int WaitStates = 0;
   for (MachineInstr *MI : EmittedInstrs) {
+    if (MI) {
+      if (IsHazard(MI))
+        return WaitStates;
+
+      unsigned Opcode = MI->getOpcode();
+      if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF)
+        continue;
+    }
     ++WaitStates;
-    if (!MI || !IsHazard(MI))
-      continue;
-    return WaitStates;
   }
   return std::numeric_limits<int>::max();
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 582153d..b24d342 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1276,6 +1276,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
 
+    assert(Subtarget->hasV4TOps());
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(MI->getOperand(0).getReg()));
     return;
@@ -1896,6 +1897,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
+    assert(Subtarget->hasV4TOps());
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(ScratchReg)
       // Predicate.
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 051827a..a1a31e1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -251,7 +251,9 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                   const Value *Val, unsigned VReg) const {
   assert(!Val == !VReg && "Return value without a vreg");
 
-  auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL));
+  auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
+  unsigned Opcode = ST.getReturnOpcode();
+  auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL));
 
   if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
     return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 46d8f0d..3767277 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1030,8 +1030,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         if (STI->isThumb())
           MIB.add(predOps(ARMCC::AL));
       } else if (RetOpcode == ARM::TCRETURNri) {
+        unsigned Opcode =
+          STI->isThumb() ? ARM::tTAILJMPr
+                         : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
         BuildMI(MBB, MBBI, dl,
-                TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+                TII.get(Opcode))
             .addReg(JumpTarget.getReg(), RegState::Kill);
       }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index bf00ef6..5dc9373 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1332,6 +1332,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
   if (AddrReg == 0) return false;
 
   unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+  assert(isThumb2 || Subtarget->hasV4TOps());
+
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(Opc)).addReg(AddrReg));
 
@@ -2168,9 +2170,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
     RetRegs.push_back(VA.getLocReg());
   }
 
-  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(RetOpc));
+                                    TII.get(Subtarget->getReturnOpcode()));
   AddOptionalDefs(MIB);
   for (unsigned R : RetRegs)
     MIB.addReg(R, RegState::Implicit);
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 16b54e8..00b788a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -479,7 +479,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   if (DPRCSSize > 0) {
     // Since vpush register list cannot have gaps, there may be multiple vpush
     // instructions in the prologue.
-    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
+    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
       DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
       LastPush = MBBI++;
     }
@@ -2397,9 +2397,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
-  // bx lr - Return from this function.
-  Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
-  BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL));
+  // Return from this function.
+  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
 
   // Restore SR0 and SR1 in case of __morestack() was not called.
   // pop {SR0, SR1}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7206083..c488cd3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -2425,7 +2425,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
   def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
                                  4, IIC_Br, [],
                                  (BX GPR:$dst)>, Sched<[WriteBr]>,
-                                 Requires<[IsARM]>;
+                                 Requires<[IsARM, HasV4T]>;
 }
 
 // Secure Monitor Call is a system instruction.
@@ -5589,6 +5589,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
                     (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
                   Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
 
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in
+  def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst),
+                    4, IIC_Br, [],
+                    (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+                  Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
+
 // Large immediate handling.
 
 // 32-bit immediate using two piece mod_imms or movw + movt.
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 7a452d4..5d57b68 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1909,6 +1909,7 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
 
   for (auto Use : Prev->uses())
     if (Use.isKill()) {
+      assert(STI->hasV4TOps());
       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
           .addReg(Use.getReg(), RegState::Kill)
           .add(predOps(ARMCC::AL))
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index e15b175..9d74953 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -729,6 +729,17 @@ public:
 
   /// True if fast-isel is used.
   bool useFastISel() const;
+
+  /// Returns the correct return opcode for the current feature set.
+  /// Use BX if available to allow mixing thumb/arm code, but fall back
+  /// to plain mov pc,lr on ARMv4.
+  unsigned getReturnOpcode() const {
+    if (isThumb())
+      return ARM::tBX_RET;
+    if (hasV4TOps())
+      return ARM::BX_RET;
+    return ARM::MOVPCLR;
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index b8a8b1f..2ab7bfe 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -142,9 +142,9 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
 
   if (isThumb) {
     if (ARMArchFeature.empty())
-      ARMArchFeature = "+thumb-mode";
+      ARMArchFeature = "+thumb-mode,+v4t";
     else
-      ARMArchFeature += ",+thumb-mode";
+      ARMArchFeature += ",+thumb-mode,+v4t";
   }
 
   if (TT.isOSNaCl()) {
diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 540e05a..d6f85ed 100644
--- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -583,8 +583,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   unsigned TmpReg = 0; // 0 for no temporary register
   unsigned SrcReg = MI.getOperand(1).getReg();
   bool SrcIsKill = MI.getOperand(1).isKill();
-  OpLo = AVR::LDRdPtr;
-  OpHi = AVR::LDDRdPtrQ;
+  OpLo = AVR::LDRdPtrPi;
+  OpHi = AVR::LDRdPtr;
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
   // Use a temporary register if src and dst registers are the same.
@@ -597,6 +597,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   // Load low byte.
   auto MIBLO = buildMI(MBB, MBBI, OpLo)
     .addReg(CurDstLoReg, RegState::Define)
+    .addReg(SrcReg, RegState::Define)
     .addReg(SrcReg);
 
   // Push low byte onto stack if necessary.
@@ -606,8 +607,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   // Load high byte.
   auto MIBHI = buildMI(MBB, MBBI, OpHi)
     .addReg(CurDstHiReg, RegState::Define)
-    .addReg(SrcReg, getKillRegState(SrcIsKill))
-    .addImm(1);
+    .addReg(SrcReg, getKillRegState(SrcIsKill));
 
   if (TmpReg) {
     // Move the high byte into the final destination.
@@ -699,7 +699,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
   OpHi = AVR::LDDRdPtrQ;
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
-  assert(Imm <= 63 && "Offset is out of range");
+  // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
+  // allowed for the instruction, 62 is the limit here.
+  assert(Imm <= 62 && "Offset is out of range");
 
   // Use a temporary register if src and dst registers are the same.
   if (DstReg == SrcReg)
@@ -741,7 +743,50 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
 
 template <>
 bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
-  llvm_unreachable("wide LPM is unimplemented");
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned TmpReg = 0; // 0 for no temporary register
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::LPMRdZPi;
+  OpHi = AVR::LPMRdZ;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // Use a temporary register if src and dst registers are the same.
+  if (DstReg == SrcReg)
+    TmpReg = scavengeGPR8(MI);
+
+  unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+  unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
+
+  // Load low byte.
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+      .addReg(CurDstLoReg, RegState::Define)
+      .addReg(SrcReg);
+
+  // Push low byte onto stack if necessary.
+  if (TmpReg)
+    buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg);
+
+  // Load high byte.
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+      .addReg(CurDstHiReg, RegState::Define)
+      .addReg(SrcReg, getKillRegState(SrcIsKill));
+
+  if (TmpReg) {
+    // Move the high byte into the final destination.
+    buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg);
+
+    // Move the low byte from the scratch space into the final destination.
+    buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
+  }
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
 }
 
 template <>
@@ -1074,7 +1119,9 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
   OpHi = AVR::STDPtrQRr;
   TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
 
-  assert(Imm <= 63 && "Offset is out of range");
+  // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
+  // allowed for the instruction, 62 is the limit here.
+  assert(Imm <= 62 && "Offset is out of range");
 
   auto MIBLO = buildMI(MBB, MBBI, OpLo)
     .addReg(DstReg)
@@ -1104,7 +1151,9 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
   OpHi = AVR::INRdA;
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
-  assert(Imm <= 63 && "Address is out of range");
+  // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
+  // allowed for the instruction, 62 is the limit here.
+  assert(Imm <= 62 && "Address is out of range");
 
   auto MIBLO = buildMI(MBB, MBBI, OpLo)
     .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1132,7 +1181,9 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
   OpHi = AVR::OUTARr;
   TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
 
-  assert(Imm <= 63 && "Address is out of range");
+  // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
+  // allowed for the instruction, 62 is the limit here.
+  assert(Imm <= 62 && "Address is out of range");
 
   // 16 bit I/O writes need the high byte first
   auto MIBHI = buildMI(MBB, MBBI, OpHi)
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 7d3faac..d8e8bc1 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1469,8 +1469,10 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
   }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator I = BB->getParent()->begin();
-  ++I;
+
+  MachineFunction::iterator I;
+  for (I = F->begin(); I != F->end() && &(*I) != BB; ++I);
+  if (I != F->end()) ++I;
 
   // Create loop block.
   MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
index b44c62a..85f9552 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -75,6 +75,11 @@ public:
   MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
     return MVT::i8;
   }
+
+  MVT::SimpleValueType getCmpLibcallReturnType() const override {
+    return MVT::i8;
+  }
+
   const char *getTargetNodeName(unsigned Opcode) const override;
 
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 744aa72..1a89a13 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -537,8 +537,7 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
     llvm_unreachable("unexpected opcode!");
   case AVR::JMPk:
   case AVR::CALLk:
-    assert(BrOffset >= 0 && "offset must be absolute address");
-    return isUIntN(16, BrOffset);
+    return true;
   case AVR::RCALLk:
   case AVR::RJMPk:
     return isIntN(13, BrOffset);
@@ -556,5 +555,20 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
   }
 }
 
+unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+                                            MachineBasicBlock &NewDestBB,
+                                            const DebugLoc &DL,
+                                            int64_t BrOffset,
+                                            RegScavenger *RS) const {
+    // This method inserts a *direct* branch (JMP), despite its name.
+    // LLVM calls this method to fixup unconditional branches; it never calls
+    // insertBranch or some hypothetical "insertDirectBranch".
+    // See lib/CodeGen/RegisterRelaxation.cpp for details.
+    // We end up here when a jump is too long for a RJMP instruction.
+    auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
+
+    return getInstSizeInBytes(MI);
+}
+
 } // end of namespace llvm
 
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
index f42d34f..eee8a92 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -107,6 +107,12 @@ public:
 
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
+
+  unsigned insertIndirectBranch(MachineBasicBlock &MBB,
+                                MachineBasicBlock &NewDestBB,
+                                const DebugLoc &DL,
+                                int64_t BrOffset,
+                                RegScavenger *RS) const override;
 private:
   const AVRRegisterInfo RI;
 };
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
index 184e4d5..7d1bfc8 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1152,10 +1152,10 @@ isReMaterializable = 1 in
   //
   // Expands to:
   // ld Rd,   P+
-  // ld Rd+1, P+
+  // ld Rd+1, P
   let Constraints = "@earlyclobber $reg" in
   def LDWRdPtr : Pseudo<(outs DREGS:$reg),
-                        (ins PTRDISPREGS:$ptrreg),
+                        (ins PTRREGS:$ptrreg),
                         "ldw\t$reg, $ptrreg",
                         [(set i16:$reg, (load i16:$ptrreg))]>,
                  Requires<[HasSRAM]>;
@@ -1164,7 +1164,7 @@ isReMaterializable = 1 in
 // Indirect loads (with postincrement or predecrement).
 let mayLoad = 1,
 hasSideEffects = 0,
-Constraints = "$ptrreg = $base_wb,@earlyclobber $reg,@earlyclobber $base_wb" in
+Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in
 {
   def LDRdPtrPi : FSTLD<0,
                         0b01,
@@ -1238,35 +1238,55 @@ isReMaterializable = 1 in
                  Requires<[HasSRAM]>;
 }
 
-class AtomicLoad<PatFrag Op, RegisterClass DRC> :
-  Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr), "atomic_op",
+class AtomicLoad<PatFrag Op, RegisterClass DRC,
+                 RegisterClass PTRRC> :
+  Pseudo<(outs DRC:$rd), (ins PTRRC:$rr), "atomic_op",
          [(set DRC:$rd, (Op i16:$rr))]>;
 
-class AtomicStore<PatFrag Op, RegisterClass DRC> :
-  Pseudo<(outs), (ins PTRDISPREGS:$rd, DRC:$rr), "atomic_op",
+class AtomicStore<PatFrag Op, RegisterClass DRC,
+                  RegisterClass PTRRC> :
+  Pseudo<(outs), (ins PTRRC:$rd, DRC:$rr), "atomic_op",
          [(Op i16:$rd, DRC:$rr)]>;
 
-class AtomicLoadOp<PatFrag Op, RegisterClass DRC> :
-  Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr, DRC:$operand),
+class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
+                   RegisterClass PTRRC> :
+  Pseudo<(outs DRC:$rd), (ins PTRRC:$rr, DRC:$operand),
          "atomic_op",
          [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
 
-def AtomicLoad8   : AtomicLoad<atomic_load_8, GPR8>;
-def AtomicLoad16  : AtomicLoad<atomic_load_16, DREGS>;
-
-def AtomicStore8  : AtomicStore<atomic_store_8, GPR8>;
-def AtomicStore16 : AtomicStore<atomic_store_16, DREGS>;
-
-def AtomicLoadAdd8  : AtomicLoadOp<atomic_load_add_8, GPR8>;
-def AtomicLoadAdd16 : AtomicLoadOp<atomic_load_add_16, DREGS>;
-def AtomicLoadSub8  : AtomicLoadOp<atomic_load_sub_8, GPR8>;
-def AtomicLoadSub16 : AtomicLoadOp<atomic_load_sub_16, DREGS>;
-def AtomicLoadAnd8  : AtomicLoadOp<atomic_load_and_8, GPR8>;
-def AtomicLoadAnd16 : AtomicLoadOp<atomic_load_and_16, DREGS>;
-def AtomicLoadOr8   : AtomicLoadOp<atomic_load_or_8, GPR8>;
-def AtomicLoadOr16  : AtomicLoadOp<atomic_load_or_16, DREGS>;
-def AtomicLoadXor8  : AtomicLoadOp<atomic_load_xor_8, GPR8>;
-def AtomicLoadXor16 : AtomicLoadOp<atomic_load_xor_16, DREGS>;
+// FIXME: I think 16-bit atomic binary ops need to mark
+// r0 as clobbered.
+
+// Atomic instructions
+// ===================
+//
+// These are all expanded by AVRExpandPseudoInsts
+//
+// 8-bit operations can use any pointer register because
+// they are expanded directly into an LD/ST instruction.
+//
+// 16-bit operations use 16-bit load/store postincrement instructions,
+// which require PTRDISPREGS.
+
+def AtomicLoad8   : AtomicLoad<atomic_load_8, GPR8, PTRREGS>;
+def AtomicLoad16  : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>;
+
+def AtomicStore8  : AtomicStore<atomic_store_8, GPR8, PTRREGS>;
+def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>;
+
+class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>;
+class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>;
+
+def AtomicLoadAdd8  : AtomicLoadOp8<atomic_load_add_8>;
+def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
+def AtomicLoadSub8  : AtomicLoadOp8<atomic_load_sub_8>;
+def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
+def AtomicLoadAnd8  : AtomicLoadOp8<atomic_load_and_8>;
+def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
+def AtomicLoadOr8   : AtomicLoadOp8<atomic_load_or_8>;
+def AtomicLoadOr16  : AtomicLoadOp16<atomic_load_or_16>;
+def AtomicLoadXor8  : AtomicLoadOp8<atomic_load_xor_8>;
+def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
 def AtomicFence     : Pseudo<(outs), (ins), "atomic_fence",
                              [(atomic_fence imm, imm)]>;
 
@@ -1397,6 +1417,7 @@ def STDWPtrQRr : Pseudo<(outs),
 // Load program memory operations.
 let canFoldAsLoad = 1,
 isReMaterializable = 1,
+mayLoad = 1,
 hasSideEffects = 0 in
 {
   let Defs = [R0],
@@ -1417,8 +1438,7 @@ hasSideEffects = 0 in
                Requires<[HasLPMX]>;
 
   // Load program memory, while postincrementing the Z register.
-  let mayLoad = 1,
-  Defs = [R31R30] in
+  let Defs = [R31R30] in
   {
     def LPMRdZPi : FLPMX<0,
                          1,
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 249dc55..7099b29 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -203,7 +203,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // If the offset is too big we have to adjust and restore the frame pointer
   // to materialize a valid load/store with displacement.
   //:TODO: consider using only one adiw/sbiw chain for more than one frame index
-  if (Offset > 63) {
+  if (Offset > 62) {
     unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK;
     int AddOffset = Offset - 63 + 1;
 
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index a9d61ff..e698b6e 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -25,7 +25,7 @@
 
 namespace llvm {
 
-static const char *AVRDataLayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-n8";
+static const char *AVRDataLayout = "e-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
 
 /// Processes a CPU name.
 static StringRef getCPU(StringRef CPU) {
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index a2d8c16..2b45d9a 100644
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -13,6 +13,8 @@
 
 #include "AVRTargetStreamer.h"
 
+#include "llvm/MC/MCContext.h"
+
 namespace llvm {
 
 AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
@@ -20,5 +22,23 @@ AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 AVRTargetAsmStreamer::AVRTargetAsmStreamer(MCStreamer &S)
     : AVRTargetStreamer(S) {}
 
+void AVRTargetStreamer::finish() {
+  MCStreamer &OS = getStreamer();
+  MCContext &Context = OS.getContext();
+
+  MCSymbol *DoCopyData = Context.getOrCreateSymbol("__do_copy_data");
+  MCSymbol *DoClearBss = Context.getOrCreateSymbol("__do_clear_bss");
+
+  // FIXME: We can disable __do_copy_data if there are no static RAM variables.
+
+  OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
+  OS.emitRawComment("copy all variables from program memory to RAM on startup");
+  OS.EmitSymbolAttribute(DoCopyData, MCSA_Global);
+
+  OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
+  OS.emitRawComment("clear the zeroed data section on startup");
+  OS.EmitSymbolAttribute(DoClearBss, MCSA_Global);
+}
+
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
index 99a5366..815088b 100644
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
@@ -19,6 +19,8 @@ class MCStreamer;
 class AVRTargetStreamer : public MCTargetStreamer {
 public:
   explicit AVRTargetStreamer(MCStreamer &S);
+
+  void finish() override;
 };
 
 /// A target streamer for textual AVR assembly code.
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 81b0aa7..5740b49 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -578,11 +578,15 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         .addReg(LHS)
         .addReg(MI.getOperand(2).getReg())
         .addMBB(Copy1MBB);
-  else
+  else {
+    int64_t imm32 = MI.getOperand(2).getImm();
+    // sanity check before we build J*_ri instruction.
+    assert (isInt<32>(imm32));
     BuildMI(BB, DL, TII.get(NewCC))
         .addReg(LHS)
-        .addImm(MI.getOperand(2).getImm())
+        .addImm(imm32)
         .addMBB(Copy1MBB);
+  }
 
   // Copy0MBB:
   //  %FalseValue = ...
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
index f683578..59e92f8 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -464,7 +464,7 @@ let usesCustomInserter = 1 in {
                       (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2),
                       "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2",
                       [(set i64:$dst,
-                       (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>;
+                       (BPFselectcc i64:$lhs, (i64 i64immSExt32:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>;
 }
 
 // load 64-bit global addr into register
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index e12188e..a294004 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -304,6 +304,9 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                   const MCSubtargetInfo *STI);
 
+  bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                       const MCSubtargetInfo *STI);
+
   bool reportParseError(Twine ErrorMsg);
   bool reportParseError(SMLoc Loc, Twine ErrorMsg);
 
@@ -2511,6 +2514,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
     return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::SEQIMacro:
     return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::MFTC0:   case Mips::MTTC0:
+  case Mips::MFTGPR:  case Mips::MTTGPR:
+  case Mips::MFTLO:   case Mips::MTTLO:
+  case Mips::MFTHI:   case Mips::MTTHI:
+  case Mips::MFTACX:  case Mips::MTTACX:
+  case Mips::MFTDSP:  case Mips::MTTDSP:
+  case Mips::MFTC1:   case Mips::MTTC1:
+  case Mips::MFTHC1:  case Mips::MTTHC1:
+  case Mips::CFTC1:   case Mips::CTTC1:
+    return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   }
 }
 
@@ -4882,6 +4895,212 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   return false;
 }
 
+// Map the DSP accumulator and control register to the corresponding gpr
+// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions
+// do not map the DSP registers contigously to gpr registers.
+static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) {
+  switch (Inst.getOpcode()) {
+    case Mips::MFTLO:
+    case Mips::MTTLO:
+      switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+        case Mips::AC0:
+          return Mips::ZERO;
+        case Mips::AC1:
+          return Mips::A0;
+        case Mips::AC2:
+          return Mips::T0;
+        case Mips::AC3:
+          return Mips::T4;
+        default:
+          llvm_unreachable("Unknown register for 'mttr' alias!");
+    }
+    case Mips::MFTHI:
+    case Mips::MTTHI:
+      switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+        case Mips::AC0:
+          return Mips::AT;
+        case Mips::AC1:
+          return Mips::A1;
+        case Mips::AC2:
+          return Mips::T1;
+        case Mips::AC3:
+          return Mips::T5;
+        default:
+          llvm_unreachable("Unknown register for 'mttr' alias!");
+    }
+    case Mips::MFTACX:
+    case Mips::MTTACX:
+      switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+        case Mips::AC0:
+          return Mips::V0;
+        case Mips::AC1:
+          return Mips::A2;
+        case Mips::AC2:
+          return Mips::T2;
+        case Mips::AC3:
+          return Mips::T6;
+        default:
+          llvm_unreachable("Unknown register for 'mttr' alias!");
+    }
+    case Mips::MFTDSP:
+    case Mips::MTTDSP:
+      return Mips::S0;
+    default:
+      llvm_unreachable("Unknown instruction for 'mttr' dsp alias!");
+  }
+}
+
+// Map the floating point register operand to the corresponding register
+// operand.
+static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) {
+  switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) {
+    case Mips::F0:  return Mips::ZERO;
+    case Mips::F1:  return Mips::AT;
+    case Mips::F2:  return Mips::V0;
+    case Mips::F3:  return Mips::V1;
+    case Mips::F4:  return Mips::A0;
+    case Mips::F5:  return Mips::A1;
+    case Mips::F6:  return Mips::A2;
+    case Mips::F7:  return Mips::A3;
+    case Mips::F8:  return Mips::T0;
+    case Mips::F9:  return Mips::T1;
+    case Mips::F10: return Mips::T2;
+    case Mips::F11: return Mips::T3;
+    case Mips::F12: return Mips::T4;
+    case Mips::F13: return Mips::T5;
+    case Mips::F14: return Mips::T6;
+    case Mips::F15: return Mips::T7;
+    case Mips::F16: return Mips::S0;
+    case Mips::F17: return Mips::S1;
+    case Mips::F18: return Mips::S2;
+    case Mips::F19: return Mips::S3;
+    case Mips::F20: return Mips::S4;
+    case Mips::F21: return Mips::S5;
+    case Mips::F22: return Mips::S6;
+    case Mips::F23: return Mips::S7;
+    case Mips::F24: return Mips::T8;
+    case Mips::F25: return Mips::T9;
+    case Mips::F26: return Mips::K0;
+    case Mips::F27: return Mips::K1;
+    case Mips::F28: return Mips::GP;
+    case Mips::F29: return Mips::SP;
+    case Mips::F30: return Mips::FP;
+    case Mips::F31: return Mips::RA;
+    default: llvm_unreachable("Unknown register for mttc1 alias!");
+  }
+}
+
+// Map the coprocessor operand the corresponding gpr register operand.
+static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) {
+  switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) {
+    case Mips::COP00:  return Mips::ZERO;
+    case Mips::COP01:  return Mips::AT;
+    case Mips::COP02:  return Mips::V0;
+    case Mips::COP03:  return Mips::V1;
+    case Mips::COP04:  return Mips::A0;
+    case Mips::COP05:  return Mips::A1;
+    case Mips::COP06:  return Mips::A2;
+    case Mips::COP07:  return Mips::A3;
+    case Mips::COP08:  return Mips::T0;
+    case Mips::COP09:  return Mips::T1;
+    case Mips::COP010: return Mips::T2;
+    case Mips::COP011: return Mips::T3;
+    case Mips::COP012: return Mips::T4;
+    case Mips::COP013: return Mips::T5;
+    case Mips::COP014: return Mips::T6;
+    case Mips::COP015: return Mips::T7;
+    case Mips::COP016: return Mips::S0;
+    case Mips::COP017: return Mips::S1;
+    case Mips::COP018: return Mips::S2;
+    case Mips::COP019: return Mips::S3;
+    case Mips::COP020: return Mips::S4;
+    case Mips::COP021: return Mips::S5;
+    case Mips::COP022: return Mips::S6;
+    case Mips::COP023: return Mips::S7;
+    case Mips::COP024: return Mips::T8;
+    case Mips::COP025: return Mips::T9;
+    case Mips::COP026: return Mips::K0;
+    case Mips::COP027: return Mips::K1;
+    case Mips::COP028: return Mips::GP;
+    case Mips::COP029: return Mips::SP;
+    case Mips::COP030: return Mips::FP;
+    case Mips::COP031: return Mips::RA;
+    default: llvm_unreachable("Unknown register for mttc0 alias!");
+  }
+}
+
+/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing
+/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits.
+bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                                    const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+  unsigned rd = 0;
+  unsigned u = 1;
+  unsigned sel = 0;
+  unsigned h = 0;
+  bool IsMFTR = false;
+  switch (Inst.getOpcode()) {
+    case Mips::MFTC0:
+      IsMFTR = true;
+      LLVM_FALLTHROUGH;
+    case Mips::MTTC0:
+      u = 0;
+      rd = getRegisterForMxtrC0(Inst, IsMFTR);
+      sel = Inst.getOperand(2).getImm();
+      break;
+    case Mips::MFTGPR:
+      IsMFTR = true;
+      LLVM_FALLTHROUGH;
+    case Mips::MTTGPR:
+      rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg();
+      break;
+    case Mips::MFTLO:
+    case Mips::MFTHI:
+    case Mips::MFTACX:
+    case Mips::MFTDSP:
+      IsMFTR = true;
+      LLVM_FALLTHROUGH;
+    case Mips::MTTLO:
+    case Mips::MTTHI:
+    case Mips::MTTACX:
+    case Mips::MTTDSP:
+      rd = getRegisterForMxtrDSP(Inst, IsMFTR);
+      sel = 1;
+      break;
+    case Mips::MFTHC1:
+      h = 1;
+      LLVM_FALLTHROUGH;
+    case Mips::MFTC1:
+      IsMFTR = true;
+      rd = getRegisterForMxtrFP(Inst, IsMFTR);
+      sel = 2;
+      break;
+    case Mips::MTTHC1:
+      h = 1;
+      LLVM_FALLTHROUGH;
+    case Mips::MTTC1:
+      rd = getRegisterForMxtrFP(Inst, IsMFTR);
+      sel = 2;
+      break;
+    case Mips::CFTC1:
+      IsMFTR = true;
+      LLVM_FALLTHROUGH;
+    case Mips::CTTC1:
+      rd = getRegisterForMxtrFP(Inst, IsMFTR);
+      sel = 3;
+      break;
+  }
+  unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd;
+  unsigned Op1 =
+      IsMFTR ? rd
+             : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg()
+                                                 : Inst.getOperand(0).getReg());
+
+  TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc,
+                 STI);
+  return false;
+}
+
 unsigned
 MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
                                               const OperandVector &Operands) {
@@ -5793,14 +6012,21 @@ OperandMatchResultTy
 MipsAsmParser::parseInvNum(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const MCExpr *IdVal;
-  // If the first token is '$' we may have register operand.
-  if (Parser.getTok().is(AsmToken::Dollar))
-    return MatchOperand_NoMatch;
+  // If the first token is '$' we may have register operand. We have to reject
+  // cases where it is not a register. Complicating the matter is that
+  // register names are not reserved across all ABIs.
+  // Peek past the dollar to see if it's a register name for this ABI.
   SMLoc S = Parser.getTok().getLoc();
+  if (Parser.getTok().is(AsmToken::Dollar)) {
+    return matchCPURegisterName(Parser.getLexer().peekTok().getString()) == -1
+               ? MatchOperand_ParseFail
+               : MatchOperand_NoMatch;
+  }
   if (getParser().parseExpression(IdVal))
     return MatchOperand_ParseFail;
   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal);
-  assert(MCE && "Unexpected MCExpr type.");
+  if (!MCE)
+    return MatchOperand_NoMatch;
   int64_t Val = MCE->getValue();
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
   Operands.push_back(MipsOperand::CreateImm(
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index aad6bf3..0bddba7 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -246,8 +246,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
     break;
   case MEK_CALL_HI16:
   case MEK_CALL_LO16:
-  case MEK_DTPREL_HI:
-  case MEK_DTPREL_LO:
   case MEK_GOT:
   case MEK_GOT_CALL:
   case MEK_GOT_DISP:
@@ -263,14 +261,16 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case MEK_NEG:
   case MEK_PCREL_HI16:
   case MEK_PCREL_LO16:
-  case MEK_TLSLDM:
     // If we do have nested target-specific expressions, they will be in
     // a consecutive chain.
     if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
       E->fixELFSymbolsInTLSFixups(Asm);
     break;
-  case MEK_GOTTPREL:
+  case MEK_DTPREL_HI:
+  case MEK_DTPREL_LO:
+  case MEK_TLSLDM:
   case MEK_TLSGD:
+  case MEK_GOTTPREL:
   case MEK_TPREL_HI:
   case MEK_TPREL_LO:
     fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 2907b77..7caeb08 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -193,6 +193,21 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1,
   emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI);
 }
 
+void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0,
+                                   unsigned Reg1, int16_t Imm0, int16_t Imm1,
+                                   int16_t Imm2, SMLoc IDLoc,
+                                   const MCSubtargetInfo *STI) {
+  MCInst TmpInst;
+  TmpInst.setOpcode(Opcode);
+  TmpInst.addOperand(MCOperand::createReg(Reg0));
+  TmpInst.addOperand(MCOperand::createReg(Reg1));
+  TmpInst.addOperand(MCOperand::createImm(Imm0));
+  TmpInst.addOperand(MCOperand::createImm(Imm1));
+  TmpInst.addOperand(MCOperand::createImm(Imm2));
+  TmpInst.setLoc(IDLoc);
+  getStreamer().EmitInstruction(TmpInst, *STI);
+}
+
 void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg,
                                   unsigned TrgReg, bool Is64Bit,
                                   const MCSubtargetInfo *STI) {
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index f82f82f..20c1ab5 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -415,6 +415,13 @@ class BITREV_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"bitrev", int_mips_bitrev,
 class BPOSGE32_MM_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget_mm,
                                             NoItinerary>;
 
+let DecoderNamespace = "MicroMipsDSP", Arch = "mmdsp",
+    AdditionalPredicates = [HasDSP, InMicroMips] in {
+    def LWDSP_MM : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel,
+                   LW_FM_MM<0x3f>;
+    def SWDSP_MM : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel,
+                   LW_FM_MM<0x3e>;
+}
 // Instruction defs.
 // microMIPS DSP Rev 1
 def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index c238a65..2595333 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -1284,6 +1284,12 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
   def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>;
 }
 
+let DecoderNamespace = "MipsDSP", Arch = "dsp",
+    AdditionalPredicates = [HasDSP] in {
+  def LWDSP : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, LW_FM<0x23>;
+  def SWDSP : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, LW_FM<0x2b>;
+}
+
 // Pseudo CMP and PICK instructions.
 class PseudoCMP<Instruction RealInst> :
   PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index ef05166..27a8597 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
   return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
 }
 
+// Estimate the size of the stack, including the incoming arguments. We need to
+// account for register spills, local objects, reserved call frame and incoming
+// arguments. This is required to determine the largest possible positive offset
+// from $sp so that it can be determined if an emergency spill slot for stack
+// addresses is required.
 uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
-  int64_t Offset = 0;
+  int64_t Size = 0;
 
-  // Iterate over fixed sized objects.
+  // Iterate over fixed sized objects which are incoming arguments.
   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-    Offset = std::max(Offset, -MFI.getObjectOffset(I));
+    if (MFI.getObjectOffset(I) > 0)
+      Size += MFI.getObjectSize(I);
 
   // Conservatively assume all callee-saved registers will be saved.
   for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
-    unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
-    Offset = alignTo(Offset + Size, Size);
+    unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
+    Size = alignTo(Size + RegSize, RegSize);
   }
 
-  unsigned MaxAlign = MFI.getMaxAlignment();
-
-  // Check that MaxAlign is not zero if there is a stack object that is not a
-  // callee-saved spill.
-  assert(!MFI.getObjectIndexEnd() || MaxAlign);
-
-  // Iterate over other objects.
-  for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
-    Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
-
-  // Call frame.
-  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
-    Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
-                     std::max(MaxAlign, getStackAlignment()));
-
-  return alignTo(Offset, getStackAlignment());
+  // Get the size of the rest of the frame objects and any possible reserved
+  // call frame, accounting for alignment.
+  return Size + MFI.estimateStackSize(MF);
 }
 
 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td
index 64bee5b..edc0981 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td
@@ -35,6 +35,8 @@ class FIELD5<bits<5> Val> {
 def FIELD5_1_DMT_EMT  : FIELD5<0b00001>;
 def FIELD5_2_DMT_EMT  : FIELD5<0b01111>;
 def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>;
+def FIELD5_MFTR : FIELD5<0b01000>;
+def FIELD5_MTTR : FIELD5<0b01100>;
 
 class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst {
   bits<32> Inst;
@@ -50,6 +52,25 @@ class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst {
   let Inst{2-0}   = 0b001;
 }
 
+class COP0_MFTTR_MT<FIELD5 Op> : MipsMTInst {
+  bits<32> Inst;
+
+  bits<5> rt;
+  bits<5> rd;
+  bits<1> u;
+  bits<1> h;
+  bits<3> sel;
+  let Inst{31-26} = 0b010000; // COP0
+  let Inst{25-21} = Op.Value; // MFMC0
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0b00000;  // rx - currently unsupported.
+  let Inst{5}     = u;
+  let Inst{4}     = h;
+  let Inst{3}     = 0b0;
+  let Inst{2-0}   = sel;
+}
+
 class SPECIAL3_MT_FORK : MipsMTInst {
   bits<32> Inst;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td
index ab6693f..72e626c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td
@@ -6,6 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// This file describes the MIPS MT ASE as defined by MD00378 1.12.
+//
+// TODO: Add support for the microMIPS encodings for the MT ASE and add the
+//       instruction mappings.
+//
+//===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
 // MIPS MT Instruction Encodings
@@ -27,6 +34,10 @@ class FORK_ENC : SPECIAL3_MT_FORK;
 
 class YIELD_ENC : SPECIAL3_MT_YIELD;
 
+class MFTR_ENC : COP0_MFTTR_MT<FIELD5_MFTR>;
+
+class MTTR_ENC : COP0_MFTTR_MT<FIELD5_MTTR>;
+
 //===----------------------------------------------------------------------===//
 // MIPS MT Instruction Descriptions
 //===----------------------------------------------------------------------===//
@@ -39,6 +50,22 @@ class MT_1R_DESC_BASE<string instr_asm, InstrItinClass Itin = NoItinerary> {
   InstrItinClass Itinerary = Itin;
 }
 
+class MFTR_DESC {
+  dag OutOperandList = (outs GPR32Opnd:$rd);
+  dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
+  string AsmString = "mftr\t$rd, $rt, $u, $sel, $h";
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_MFTR;
+}
+
+class MTTR_DESC {
+  dag OutOperandList = (outs GPR32Opnd:$rd);
+  dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
+  string AsmString = "mttr\t$rt, $rd, $u, $sel, $h";
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_MTTR;
+}
+
 class FORK_DESC {
   dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd);
   dag InOperandList = (ins GPR32Opnd:$rt);
@@ -79,9 +106,74 @@ let hasSideEffects = 1, isNotDuplicable = 1,
   def FORK : FORK_ENC, FORK_DESC, ASE_MT;
 
   def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT;
+
+  def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT;
+
+  def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT;
 }
 
 //===----------------------------------------------------------------------===//
+// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases.
+//===----------------------------------------------------------------------===//
+def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt,
+                                                        uimm3:$sel),
+                              "mftc0 $rd, $rt, $sel">, ASE_MT;
+
+def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt,
+                                                          uimm3:$sel),
+                               "mftgpr $rd, $rt">, ASE_MT;
+
+def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+                              "mftlo $rt, $ac">, ASE_MT;
+
+def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+                              "mfthi $rt, $ac">, ASE_MT;
+
+def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+                               "mftacx $rt, $ac">, ASE_MT;
+
+def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins),
+                               "mftdsp $rt">, ASE_MT;
+
+def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
+                              "mftc1 $rt, $ft">, ASE_MT;
+
+def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
+                               "mfthc1 $rt, $ft">, ASE_MT;
+
+def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft),
+                              "cftc1 $rt, $ft">, ASE_MT;
+
+
+def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt,
+                                                        uimm3:$sel),
+                              "mttc0 $rt, $rd, $sel">, ASE_MT;
+
+def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd),
+                               "mttgpr $rd, $rt">, ASE_MT;
+
+def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+                              "mttlo $rt, $ac">, ASE_MT;
+
+def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+                              "mtthi $rt, $ac">, ASE_MT;
+
+def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+                               "mttacx $rt, $ac">, ASE_MT;
+
+def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt),
+                               "mttdsp $rt">, ASE_MT;
+
+def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
+                              "mttc1 $rt, $ft">, ASE_MT;
+
+def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
+                               "mtthc1 $rt, $ft">, ASE_MT;
+
+def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt),
+                              "cttc1 $rt, $ft">, ASE_MT;
+
+//===----------------------------------------------------------------------===//
 // MIPS MT Instruction Definitions
 //===----------------------------------------------------------------------===//
 
@@ -95,4 +187,22 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT;
 
   def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0),
+                      1>, ASE_MT;
+
+  def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0),
+                      1>, ASE_MT;
+
+  def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT;
+
+  def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 102ebb2..735461c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -894,10 +894,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   // Set scavenging frame index if necessary.
-  uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
-    estimateStackSize(MF);
+  uint64_t MaxSPOffset = estimateStackSize(MF);
 
-  if (isInt<16>(MaxSPOffset))
+  // MSA has a minimum offset of 10 bits signed. If there is a variable
+  // sized object on the stack, the estimation cannot account for it.
+  if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) &&
+      !MF.getFrameInfo().hasVarSizedObjects())
     return;
 
   const TargetRegisterClass &RC =
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index ee07479..d2c2169 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -226,6 +226,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::SW;
   else if (Mips::HI64RegClass.hasSubClassEq(RC))
     Opc = Mips::SD;
+  else if (Mips::DSPRRegClass.hasSubClassEq(RC))
+    Opc = Mips::SWDSP;
 
   // Hi, Lo are normally caller save but they are callee save
   // for interrupt handling.
@@ -302,6 +304,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::LW;
   else if (Mips::LO64RegClass.hasSubClassEq(RC))
     Opc = Mips::LD;
+  else if (Mips::DSPRRegClass.hasSubClassEq(RC))
+    Opc = Mips::LWDSP;
 
   assert(Opc && "Register class not handled!");
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index c2947bb..8ec55ab 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -226,6 +226,7 @@ def II_MFC1             : InstrItinClass;
 def II_MFHC1            : InstrItinClass;
 def II_MFC2             : InstrItinClass;
 def II_MFHI_MFLO        : InstrItinClass; // mfhi and mflo
+def II_MFTR             : InstrItinClass;
 def II_MOD              : InstrItinClass;
 def II_MODU             : InstrItinClass;
 def II_MOVE             : InstrItinClass;
@@ -255,6 +256,7 @@ def II_MTC1             : InstrItinClass;
 def II_MTHC1            : InstrItinClass;
 def II_MTC2             : InstrItinClass;
 def II_MTHI_MTLO        : InstrItinClass; // mthi and mtlo
+def II_MTTR             : InstrItinClass;
 def II_MUL              : InstrItinClass;
 def II_MUH              : InstrItinClass;
 def II_MUHU             : InstrItinClass;
@@ -664,12 +666,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MFHC0           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFC1            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFC2            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_MFTR            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC0            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTHC0           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC1            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC2            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFHC1           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTHC1           , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_MTTR            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_CACHE           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_PREF            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_CACHEE          , [InstrStage<1,  [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 89cda67..e4c52a4 100644
--- a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -268,9 +268,11 @@ def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>;
 // MIPS MT instructions
 // ====================
 
-def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>;
+def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE, II_MFTR,
+                                  II_MTTR]>;
 
 def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>;
+
 def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>;
 
 // MIPS32R6 and MIPS16e
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 7d9f99c..af24838 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -119,6 +119,9 @@ public:
                SMLoc IDLoc, const MCSubtargetInfo *STI);
   void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
                SMLoc IDLoc, const MCSubtargetInfo *STI);
+  void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0,
+                 int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
+                 const MCSubtargetInfo *STI);
   void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit,
                 const MCSubtargetInfo *STI);
   void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 957b46c40..607bc45 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7026,6 +7026,18 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getTargetConstant(1, dl, VT);
 
   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+    if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+      // Split the pieces.
+      SDValue Lower =
+          DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32));
+      SDValue Upper =
+          DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
+      // We have to manually lower both halves so getNode doesn't try to
+      // reassemble the build_vector.
+      Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
+      Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
+    }
     SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
     if (Imm.getValueSizeInBits() == VT.getSizeInBits())
       return DAG.getBitcast(VT, Imm);
@@ -34733,6 +34745,11 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
   if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
     return SDValue();
 
+  // Bail out if we know that this is not really just an oversized integer.
+  if (peekThroughBitcasts(X).getValueType() == MVT::f128 ||
+      peekThroughBitcasts(Y).getValueType() == MVT::f128)
+    return SDValue();
+
   // TODO: Use PXOR + PTEST for SSE4.1 or later?
   // TODO: Add support for AVX-512.
   EVT VT = SetCC->getValueType(0);