47 files changed, 436 insertions, 190 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index d554fe5..9550a3a 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -46,6 +46,6 @@ FunctionPass *createThumb2SizeReductionPass(
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index c7ea18a..96b4742 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -410,13 +410,13 @@ def : ProcessorModel<"cortex-r4",   CortexA8Model,
 def : ProcessorModel<"cortex-r4f",  CortexA8Model,
                                     [ProcR4,
                                      FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                     FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+                                     FeatureVFP3, FeatureD16]>;
 
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                     [ProcR5, HasV7Ops, FeatureDB,
                                      FeatureVFP3, FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureVFPOnlySP,
+                                     FeatureHasRAS,
                                      FeatureD16, FeatureRClass]>;
 
 // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 4530e41..738dded 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() {
     } else if (STI.hasVFP4())
       ATS.emitFPU(ARM::FK_NEON_VFPV4);
     else
-      ATS.emitFPU(ARM::FK_NEON);
+      ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
     if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
@@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() {
                   ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
                   : ARM::FK_VFPV4);
     else if (STI.hasVFP3())
-      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
+      ATS.emitFPU(STI.hasD16()
+                  // +d16
+                  ? (STI.isFPOnlySP()
+                     ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
+                     : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
+                  // -d16
+                  : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
     else if (STI.hasVFP2())
       ATS.emitFPU(ARM::FK_VFPV2);
   }
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f2b7a64..b1a11d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -367,14 +367,10 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
 
 
 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return 0;
+
   if (!isUncondBranchOpcode(I->getOpcode()) &&
       !isCondBranchOpcode(I->getOpcode()))
     return 0;
@@ -594,7 +590,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
   // all definitions of CPSR are dead
   return true;
 }
-} // namespace llvm
+}
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 6fc0edd..b4706e3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -493,6 +493,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                          unsigned FrameReg, int &Offset,
                          const ARMBaseInstrInfo &TII);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index 2edb96a..d687568 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -281,6 +281,6 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index cb4eeb5..f4ec8c6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -335,7 +335,7 @@ namespace {
     }
   };
   char ARMConstantIslands::ID = 0;
-} // namespace
+}
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void ARMConstantIslands::verify() {
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index b429bed..36f63e2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -44,7 +44,7 @@ namespace ARMCP {
     GOTTPOFF,
     TPOFF
   };
-} // namespace ARMCP
+}
 
 /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
 /// represent PC-relative displacement between the address of the load
@@ -254,6 +254,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 963b46c..4438f50 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,7 +69,7 @@ namespace {
                            MachineBasicBlock::iterator &MBBI);
   };
   char ARMExpandPseudo::ID = 0;
-} // namespace
+}
 
 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
 /// the instructions created from the expansion.
@@ -129,7 +129,7 @@ namespace {
       return PseudoOpc < TE.PseudoOpc;
     }
   };
-} // namespace
+}
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index cead18f..4175b4a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2898,7 +2898,7 @@ const struct FoldableLoadExtendsStruct {
   { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
   { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
 };
-} // namespace
+}
 
 /// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
index 5b4a44c..0c910ab 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
@@ -92,6 +92,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) {
   }
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 091086d..a52e497 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -221,7 +221,7 @@ struct StackAdjustingInsts {
     }
   }
 };
-} // namespace
+}
 
 /// Emit an instruction sequence that will align the address in
 /// register Reg by zero-ing out the lower bits.  For versions of the
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index 98313e6..d763d17 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -78,6 +78,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 575a9d9..50afb19 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -279,7 +279,7 @@ private:
   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
                         bool is64BitVector);
 };
-} // namespace
+}
 
 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 /// operand. If so Imm will receive the 32-bit value.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 94a026b..4b2105b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -83,7 +83,7 @@ namespace {
       CallOrPrologue = PC;
     }
   };
-} // namespace
+}
 
 // The APCS parameter registers.
 static const MCPhysReg GPRArgRegs[] = {
@@ -11404,6 +11404,167 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
               Addr});
 }
 
+/// \brief Lower an interleaved load into a vldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+///        %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
+///        %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
+///
+///      Into:
+///        %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
+///        %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
+///        %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
+bool ARMTargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  VectorType *VecTy = Shuffles[0]->getType();
+  Type *EltTy = VecTy->getVectorElementType();
+
+  const DataLayout *DL = getDataLayout();
+  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+  // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
+  // support i64/f64 element).
+  if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+    return false;
+
+  // A pointer vector can not be the return type of the ldN intrinsics. Need to
+  // load integer vectors first and then convert to pointer vectors.
+  if (EltTy->isPointerTy())
+    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+                            VecTy->getVectorNumElements());
+
+  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
+                                            Intrinsic::arm_neon_vld3,
+                                            Intrinsic::arm_neon_vld4};
+
+  Function *VldnFunc =
+      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
+
+  IRBuilder<> Builder(LI);
+  SmallVector<Value *, 2> Ops;
+
+  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+  Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
+  Ops.push_back(Builder.getInt32(LI->getAlignment()));
+
+  CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+
+  // Replace uses of each shufflevector with the corresponding vector loaded
+  // by ldN.
+  for (unsigned i = 0; i < Shuffles.size(); i++) {
+    ShuffleVectorInst *SV = Shuffles[i];
+    unsigned Index = Indices[i];
+
+    Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+
+    SV->replaceAllUsesWith(SubVec);
+  }
+
+  return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                                   unsigned NumElts) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < NumElts; i++)
+    Mask.push_back(Builder.getInt32(Start + i));
+
+  return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a vstN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
+///
+///      Into:
+///        %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+///        %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+///        %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+///        call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// vst3 instruction in CodeGen.
+bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
+                                              ShuffleVectorInst *SVI,
+                                              unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+
+  VectorType *VecTy = SVI->getType();
+  assert(VecTy->getVectorNumElements() % Factor == 0 &&
+         "Invalid interleaved store");
+
+  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  Type *EltTy = VecTy->getVectorElementType();
+  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+  const DataLayout *DL = getDataLayout();
+  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+  // Skip illegal sub vector types and vector types of i64/f64 element (vstN
+  // doesn't support i64/f64 element).
+  if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+    return false;
+
+  Value *Op0 = SVI->getOperand(0);
+  Value *Op1 = SVI->getOperand(1);
+  IRBuilder<> Builder(SI);
+
+  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+  // vectors to integer vectors.
+  if (EltTy->isPointerTy()) {
+    Type *IntTy = DL->getIntPtrType(EltTy);
+
+    // Convert to the corresponding integer vector.
+    Type *IntVecTy =
+        VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
+    Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+    Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+    SubVecTy = VectorType::get(IntTy, NumSubElts);
+  }
+
+  static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+                                       Intrinsic::arm_neon_vst3,
+                                       Intrinsic::arm_neon_vst4};
+  Function *VstNFunc = Intrinsic::getDeclaration(
+      SI->getModule(), StoreInts[Factor - 2], SubVecTy);
+
+  SmallVector<Value *, 6> Ops;
+
+  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+
+  // Split the shufflevector operands into sub vectors for the new vstN call.
+  for (unsigned i = 0; i < Factor; i++)
+    Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+  Ops.push_back(Builder.getInt32(SI->getAlignment()));
+  Builder.CreateCall(VstNFunc, Ops);
+  return true;
+}
+
 enum HABaseType {
   HA_UNKNOWN = 0,
   HA_FLOAT,
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 71a47a2..74396392 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -215,7 +215,7 @@ namespace llvm {
       VST3LN_UPD,
       VST4LN_UPD
     };
-  } // namespace ARMISD
+  }
 
   /// Define some predicates that are used for node matching.
   namespace ARM {
@@ -433,6 +433,15 @@ namespace llvm {
     Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                            bool IsStore, bool IsLoad) const override;
 
+    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+    bool lowerInterleavedLoad(LoadInst *LI,
+                              ArrayRef<ShuffleVectorInst *> Shuffles,
+                              ArrayRef<unsigned> Indices,
+                              unsigned Factor) const override;
+    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+                               unsigned Factor) const override;
+
     bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
     TargetLoweringBase::AtomicRMWExpansionKind
@@ -638,6 +647,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-} // namespace llvm
+}
 
 #endif  // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 59e1535..84f95be 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -198,7 +198,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char ARMCGBR::ID = 0;
 FunctionPass*
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index 9e5700a..90f34ea 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -43,6 +43,6 @@ private:
                             Reloc::Model RM) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 50e2292..245c9e8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -142,7 +142,7 @@ namespace {
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
   char ARMLoadStoreOpt::ID = 0;
-} // namespace
+}
 
 static bool definesCPSR(const MachineInstr *MI) {
   for (const auto &MO : MI->operands()) {
@@ -444,7 +444,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
       return;
     }
 
-    if (MBBI->killsRegister(Base))
+    if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
       // Register got killed. Stop updating.
       return;
   }
@@ -743,6 +743,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
     }
   }
 
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
+    if (TransferOp.isUse() && TransferOp.getReg() == Base)
+      BaseKill = false;
+  }
+
   SmallVector<std::pair<unsigned, bool>, 8> Regs;
   SmallVector<unsigned, 8> ImpDefs;
   SmallVector<MachineOperand *, 8> UsesOfImpDefs;
@@ -1464,119 +1470,124 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator &MBBI) {
   MachineInstr *MI = &*MBBI;
   unsigned Opcode = MI->getOpcode();
-  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
-    const MachineOperand &BaseOp = MI->getOperand(2);
-    unsigned BaseReg = BaseOp.getReg();
-    unsigned EvenReg = MI->getOperand(0).getReg();
-    unsigned OddReg  = MI->getOperand(1).getReg();
-    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
-    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
-    // ARM errata 602117: LDRD with base in list may result in incorrect base
-    // register when interrupted or faulted.
-    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
-    if (!Errata602117 &&
-        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
-      return false;
+  if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
+    return false;
 
-    MachineBasicBlock::iterator NewBBI = MBBI;
-    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
-    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
-    bool EvenDeadKill = isLd ?
-      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
-    bool EvenUndef = MI->getOperand(0).isUndef();
-    bool OddDeadKill  = isLd ?
-      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
-    bool OddUndef = MI->getOperand(1).isUndef();
-    bool BaseKill = BaseOp.isKill();
-    bool BaseUndef = BaseOp.isUndef();
-    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
-    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
-    int OffImm = getMemoryOpOffset(MI);
-    unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-
-    if (OddRegNum > EvenRegNum && OffImm == 0) {
-      // Ascending register numbers and no offset. It's safe to change it to a
-      // ldm or stm.
-      unsigned NewOpc = (isLd)
-        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
-        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
-      if (isLd) {
-        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
-          .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(Pred).addReg(PredReg)
-          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
-          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
-        ++NumLDRD2LDM;
-      } else {
-        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
-          .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(Pred).addReg(PredReg)
-          .addReg(EvenReg,
-                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
-          .addReg(OddReg,
-                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
-        ++NumSTRD2STM;
-      }
+  const MachineOperand &BaseOp = MI->getOperand(2);
+  unsigned BaseReg = BaseOp.getReg();
+  unsigned EvenReg = MI->getOperand(0).getReg();
+  unsigned OddReg  = MI->getOperand(1).getReg();
+  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+  unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+
+  // ARM errata 602117: LDRD with base in list may result in incorrect base
+  // register when interrupted or faulted.
+  bool Errata602117 = EvenReg == BaseReg &&
+    (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
+  // ARM LDRD/STRD needs consecutive registers.
+  bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
+    (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
+
+  if (!Errata602117 && !NonConsecutiveRegs)
+    return false;
+
+  MachineBasicBlock::iterator NewBBI = MBBI;
+  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+  bool EvenDeadKill = isLd ?
+    MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+  bool EvenUndef = MI->getOperand(0).isUndef();
+  bool OddDeadKill  = isLd ?
+    MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+  bool OddUndef = MI->getOperand(1).isUndef();
+  bool BaseKill = BaseOp.isKill();
+  bool BaseUndef = BaseOp.isUndef();
+  bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+  bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+  int OffImm = getMemoryOpOffset(MI);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  if (OddRegNum > EvenRegNum && OffImm == 0) {
+    // Ascending register numbers and no offset. It's safe to change it to a
+    // ldm or stm.
+    unsigned NewOpc = (isLd)
+      ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+      : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+    if (isLd) {
+      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+        .addReg(BaseReg, getKillRegState(BaseKill))
+        .addImm(Pred).addReg(PredReg)
+        .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+        .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+      ++NumLDRD2LDM;
+    } else {
+      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+        .addReg(BaseReg, getKillRegState(BaseKill))
+        .addImm(Pred).addReg(PredReg)
+        .addReg(EvenReg,
+                getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+        .addReg(OddReg,
+                getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
+      ++NumSTRD2STM;
+    }
+    NewBBI = std::prev(MBBI);
+  } else {
+    // Split into two instructions.
+    unsigned NewOpc = (isLd)
+      ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+      : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+    // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+    // so adjust and use t2LDRi12 here for that.
+    unsigned NewOpc2 = (isLd)
+      ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+      : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+    DebugLoc dl = MBBI->getDebugLoc();
+    // If this is a load and base register is killed, it may have been
+    // re-defed by the load, make sure the first load does not clobber it.
+    if (isLd &&
+        (BaseKill || OffKill) &&
+        (TRI->regsOverlap(EvenReg, BaseReg))) {
+      assert(!TRI->regsOverlap(OddReg, BaseReg));
+      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+                    OddReg, OddDeadKill, false,
+                    BaseReg, false, BaseUndef, false, OffUndef,
+                    Pred, PredReg, TII, isT2);
       NewBBI = std::prev(MBBI);
+      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                    EvenReg, EvenDeadKill, false,
+                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                    Pred, PredReg, TII, isT2);
     } else {
-      // Split into two instructions.
-      unsigned NewOpc = (isLd)
-        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
-        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
-      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
-      // so adjust and use t2LDRi12 here for that.
-      unsigned NewOpc2 = (isLd)
-        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
-        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
-      DebugLoc dl = MBBI->getDebugLoc();
-      // If this is a load and base register is killed, it may have been
-      // re-defed by the load, make sure the first load does not clobber it.
-      if (isLd &&
-          (BaseKill || OffKill) &&
-          (TRI->regsOverlap(EvenReg, BaseReg))) {
-        assert(!TRI->regsOverlap(OddReg, BaseReg));
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
-                      OddReg, OddDeadKill, false,
-                      BaseReg, false, BaseUndef, false, OffUndef,
-                      Pred, PredReg, TII, isT2);
-        NewBBI = std::prev(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
-                      EvenReg, EvenDeadKill, false,
-                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
-                      Pred, PredReg, TII, isT2);
-      } else {
-        if (OddReg == EvenReg && EvenDeadKill) {
-          // If the two source operands are the same, the kill marker is
-          // probably on the first one. e.g.
-          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
-          EvenDeadKill = false;
-          OddDeadKill = true;
-        }
-        // Never kill the base register in the first instruction.
-        if (EvenReg == BaseReg)
-          EvenDeadKill = false;
-        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
-                      EvenReg, EvenDeadKill, EvenUndef,
-                      BaseReg, false, BaseUndef, false, OffUndef,
-                      Pred, PredReg, TII, isT2);
-        NewBBI = std::prev(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
-                      OddReg, OddDeadKill, OddUndef,
-                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
-                      Pred, PredReg, TII, isT2);
+      if (OddReg == EvenReg && EvenDeadKill) {
+        // If the two source operands are the same, the kill marker is
+        // probably on the first one. e.g.
+        // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+        EvenDeadKill = false;
+        OddDeadKill = true;
       }
-      if (isLd)
-        ++NumLDRD2LDR;
-      else
-        ++NumSTRD2STR;
+      // Never kill the base register in the first instruction.
+      if (EvenReg == BaseReg)
+        EvenDeadKill = false;
+      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                    EvenReg, EvenDeadKill, EvenUndef,
+                    BaseReg, false, BaseUndef, false, OffUndef,
+                    Pred, PredReg, TII, isT2);
+      NewBBI = std::prev(MBBI);
+      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+                    OddReg, OddDeadKill, OddUndef,
+                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                    Pred, PredReg, TII, isT2);
     }
-
-    MBB.erase(MI);
-    MBBI = NewBBI;
-    return true;
+    if (isLd)
+      ++NumLDRD2LDR;
+    else
+      ++NumSTRD2STR;
   }
-  return false;
+
+  MBB.erase(MI);
+  MBBI = NewBBI;
+  return true;
 }
 
 /// An optimization pass to turn multiple LDR / STR ops of the same base and
@@ -1859,7 +1870,7 @@ namespace {
     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
   };
   char ARMPreAllocLoadStoreOpt::ID = 0;
-} // namespace
+}
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   TD = Fn.getTarget().getDataLayout();
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8b12102..14dd9ef 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -229,6 +229,6 @@ public:
     return It;
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 1c8e1f8..30baf42 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -32,7 +32,7 @@ public:
   }
 };
 char ARMOptimizeBarriersPass::ID = 0;
-} // namespace
+}
 
 // Returns whether the instruction can safely move past a DMB instruction
 // The current implementation allows this iif MI does not have any possible
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 4563caa..1db190f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -70,6 +70,6 @@ public:
                                  RTLIB::Libcall LC) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index f00594f..9909a6a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -453,6 +453,6 @@ public:
   /// True if fast-isel is used.
   bool useFastISel() const;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif  // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 104a34f..6e81bd2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -332,6 +332,10 @@ void ARMPassConfig::addIRPasses() {
     }));
 
   TargetPassConfig::addIRPasses();
+
+  // Match interleaved memory accesses to ldN/stN intrinsics.
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createInterleavedAccessPass(TM));
 }
 
 bool ARMPassConfig::addPreISel() {
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 4e1b371..f4901fc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -478,3 +478,28 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   }
   return LT.first;
 }
+
+unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                                unsigned Factor,
+                                                ArrayRef<unsigned> Indices,
+                                                unsigned Alignment,
+                                                unsigned AddressSpace) {
+  assert(Factor >= 2 && "Invalid interleave factor");
+  assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+  // vldN/vstN doesn't support vector types of i64/f64 element.
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+
+  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+    unsigned NumElts = VecTy->getVectorNumElements();
+    Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+    unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+
+    // vldN/vstN only support legal vector types of size 64 or 128 in bits.
+    if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+      return Factor;
+  }
+
+  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                           Alignment, AddressSpace);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 9479d76..f2e5db6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -126,6 +126,11 @@ public:
   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                            unsigned AddressSpace);
 
+  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                      unsigned Factor,
+                                      ArrayRef<unsigned> Indices,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace);
   /// @}
 };
 
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 35387d3..c2db746 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
@@ -9887,22 +9888,13 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
   }
   Lex();
 
+  MCSymbol *Sym;
   const MCExpr *Value;
-  if (Parser.parseExpression(Value)) {
-    TokError("missing expression");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    TokError("unexpected token");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-  Lex();
+  if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+                                               Parser, Sym, Value))
+    return true;
 
-  MCSymbol *Alias = getContext().getOrCreateSymbol(Name);
-  getTargetStreamer().emitThumbSet(Alias, Value);
+  getTargetStreamer().emitThumbSet(Sym, Value);
   return false;
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f973a8d..097ec04 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -81,7 +81,7 @@ namespace {
     private:
       std::vector<unsigned char> ITStates;
   };
-} // namespace
+}
 
 namespace {
 /// ARM disassembler for all ARM platforms.
@@ -118,7 +118,7 @@ private:
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbVFPPredicate(MCInst&) const;
 };
-} // namespace
+}
 
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   switch (In) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index e28f6e0..a6206e3 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -29,6 +29,6 @@ public:
                                      Subtype);
   }
 };
-} // namespace
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 412feb8..68b12ed 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -23,6 +23,6 @@ public:
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
-} // namespace
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1975bca..4289a73 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -114,7 +114,7 @@ namespace ARM_PROC {
     case ID: return "id";
     }
   }
-} // namespace ARM_PROC
+}
 
 namespace ARM_MB {
   // The Memory Barrier Option constants map directly to the 4-bit encoding of
@@ -459,6 +459,6 @@ namespace ARMII {
 
 } // end namespace ARMII
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 9fe27fb..804d353 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -40,7 +40,7 @@ namespace {
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
-} // namespace
+}
 
 ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
   : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index bbc0b37..4d12bfb 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -563,20 +563,13 @@ private:
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().createTempSymbol();
-    EmitLabel(Start);
-
     auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
         Name + "." + Twine(MappingSymbolCounter++)));
+    EmitLabel(Symbol);
 
-    getAssembler().registerSymbol(*Symbol);
     Symbol->setType(ELF::STT_NOTYPE);
     Symbol->setBinding(ELF::STB_LOCAL);
     Symbol->setExternal(false);
-    AssignSection(Symbol, getCurrentSection().first);
-
-    const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
-    Symbol->setVariableValue(Value);
   }
 
   void EmitThumbFunc(MCSymbol *Func) override {
@@ -804,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV3_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3B,
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_D16_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
+  case ARM::FK_VFPV3XD:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    break;
+  case ARM::FK_VFPV3XD_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
@@ -849,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_NEON_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+                     ARMBuildAttrs::AllowNeon,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_NEON_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
@@ -1345,6 +1382,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
     return S;
   }
 
-} // namespace llvm
+}
 
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 23ef501..46ba571 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -104,7 +104,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-} // namespace ARM
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 0fb395e..fafe25a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -370,7 +370,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
   return new ARMMCInstrAnalysis(Info);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c6f2d13..fd30623 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -103,7 +103,7 @@ MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
 
 /// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
 
 // Defines symbolic names for ARM registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 6ac778e..95d7ea7 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -56,7 +56,7 @@ public:
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
 };
-} // namespace
+}
 
 static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
                               unsigned &Log2Size) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 32481e2..173cc93 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -60,7 +60,7 @@ namespace {
         EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
     }
   };
-} // namespace
+}
 
 void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
   if (RegSave == 0u)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 34b552f..166c04b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -79,7 +79,7 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
 bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
   return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
 }
-} // namespace
+}
 
 namespace llvm {
 MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 6515a65..b993b1b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -35,7 +35,7 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
   getAssembler().setIsThumbFunc(Symbol);
 }
-} // namespace
+}
 
 MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
                                            MCAsmBackend &MAB,
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index ca98f69..ed2deea 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -71,7 +71,7 @@ namespace {
     bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
   };
   char MLxExpansion::ID = 0;
-} // namespace
+}
 
 void MLxExpansion::clearStack() {
   std::fill(LastMIs, LastMIs + 4, nullptr);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index e5e89fa..31d5732 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -47,6 +47,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 31b4df2..f3f493d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -58,6 +58,6 @@ private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI,
                             Reloc::Model RM) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 7ce602d..68736bc 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -48,7 +48,7 @@ namespace {
     bool InsertITInstructions(MachineBasicBlock &MBB);
   };
   char Thumb2ITBlockPass::ID = 0;
-} // namespace
+}
 
 /// TrackDefUses - Tracking what registers are being defined and used by
 /// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index d186dfb..916ab06 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -73,6 +73,6 @@ private:
 ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0dd1b4c..d9ab824 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -202,7 +202,7 @@ namespace {
     std::function<bool(const Function &)> PredicateFtor;
   };
   char Thumb2SizeReduce::ID = 0;
-} // namespace
+}
 
 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     : MachineFunctionPass(ID), PredicateFtor(Ftor) {
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index e55f88f..23aaff3 100644
--- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -60,6 +60,6 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 };
-} // namespace llvm
+}
 
 #endif