56 files changed, 1480 insertions, 455 deletions
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 91a9903..112512f 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -79,7 +79,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
index 36d1575..b253fef 100644
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -1,36 +1,47 @@
 ; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=BASIC %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
 ; RUN:    -mattr=-neon,-vfp3,+vfp2 \
 ; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=CORTEXA8 %s
 
 
 ; This tests that the extpected ARM attributes are emitted.
 ;
-; BASIC:        .ARM.attributes
-; BASIC-NEXT:         0x70000003
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x0000003c
-; BASIC-NEXT:         0x00000022
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000001
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         '41210000 00616561 62690001 17000000 060a0741 08010902 14011501 17031801 1901'
+; BASIC:        Section {
+; BASIC:          Name: .ARM.attributes
+; BASIC-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; BASIC-NEXT:     Flags [ (0x0)
+; BASIC-NEXT:     ]
+; BASIC-NEXT:     Address: 0x0
+; BASIC-NEXT:     Offset: 0x3C
+; BASIC-NEXT:     Size: 34
+; BASIC-NEXT:     Link: 0
+; BASIC-NEXT:     Info: 0
+; BASIC-NEXT:     AddressAlignment: 1
+; BASIC-NEXT:     EntrySize: 0
+; BASIC-NEXT:     SectionData (
+; BASIC-NEXT:       0000: 41210000 00616561 62690001 17000000
+; BASIC-NEXT:       0010: 060A0741 08010902 14011501 17031801
+; BASIC-NEXT:       0020: 1901
+; BASIC-NEXT:     )
 
-; CORTEXA8:        .ARM.attributes
-; CORTEXA8-NEXT:         0x70000003
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x0000003c
-; CORTEXA8-NEXT:         0x0000002f
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000001
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+; CORTEXA8:        Name: .ARM.attributes
+; CORTEXA8-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; CORTEXA8-NEXT:     Flags [ (0x0)
+; CORTEXA8-NEXT:     ]
+; CORTEXA8-NEXT:     Address: 0x0
+; CORTEXA8-NEXT:     Offset: 0x3C
+; CORTEXA8-NEXT:     Size: 47
+; CORTEXA8-NEXT:     Link: 0
+; CORTEXA8-NEXT:     Info: 0
+; CORTEXA8-NEXT:     AddressAlignment: 1
+; CORTEXA8-NEXT:     EntrySize: 0
+; CORTEXA8-NEXT:     SectionData (
+; CORTEXA8-NEXT:       0000: 412E0000 00616561 62690001 24000000
+; CORTEXA8-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+; CORTEXA8-NEXT:       0020: 0109020A 02140115 01170318 011901
+; CORTEXA8-NEXT:     )
 
 define i32 @f(i64 %z) {
        ret i32 0
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
index 94a0541..9eecd04 100644
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -sr -sd | FileCheck  -check-prefix=OBJ %s
 
 target triple = "armv7-none-linux-gnueabi"
 
@@ -9,32 +9,17 @@ define arm_aapcs_vfpcc i32 @barf() nounwind {
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
   ret i32 %0
-; OBJ:         '.text'
-; OBJ-NEXT:    'sh_type'
-; OBJ-NEXT:    'sh_flags'
-; OBJ-NEXT:    'sh_addr'
-; OBJ-NEXT:    'sh_offset'
-; OBJ-NEXT:    'sh_size'
-; OBJ-NEXT:    'sh_link'
-; OBJ-NEXT:    'sh_info'
-; OBJ-NEXT:    'sh_addralign'
-; OBJ-NEXT:    'sh_entsize'
-; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
-
-; OBJ:            Relocation 0
-; OBJ-NEXT:       'r_offset', 0x00000004
-; OBJ-NEXT:       'r_sym', 0x000009
-; OBJ-NEXT:        'r_type', 0x2b
-
-; OBJ:          Relocation 1
-; OBJ-NEXT:       'r_offset', 0x00000008
-; OBJ-NEXT:       'r_sym'
-; OBJ-NEXT:        'r_type', 0x2c
-
-; OBJ:          # Relocation 2
-; OBJ-NEXT:       'r_offset', 0x0000000c
-; OBJ-NEXT:       'r_sym', 0x00000a
-; OBJ-NEXT:       'r_type', 0x1c
+; OBJ:        Section {
+; OBJ:          Name: .text
+; OBJ:          Relocations [
+; OBJ-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
+; OBJ-NEXT:       0x8 R_ARM_MOVT_ABS
+; OBJ-NEXT:       0xC R_ARM_CALL foo
+; OBJ-NEXT:     ]
+; OBJ-NEXT:     SectionData (
+; OBJ-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
+; OBJ-NEXT:       0010: 0088BDE8
+; OBJ-NEXT:     )
 
 }
 
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index b8ed819..1351a26 100644
--- a/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -1,9 +1,9 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ELFASM %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=ELFOBJ %s
 
-;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; Make sure that bl __aeabi_read_tp is materialized and fixed up correctly
 ;; in the obj case. 
 
 @i = external thread_local global i32
@@ -24,19 +24,13 @@ bb:                                               ; preds = %entry
 ; ELFASM:       	bl	__aeabi_read_tp
 
 
-; ELFOBJ:   '.text'
-; ELFOBJ-NEXT:  'sh_type'
-; ELFOBJ-NEXT:  'sh_flags'
-; ELFOBJ-NEXT:  'sh_addr'
-; ELFOBJ-NEXT:  'sh_offset'
-; ELFOBJ-NEXT:  'sh_size'
-; ELFOBJ-NEXT:  'sh_link'
-; ELFOBJ-NEXT:  'sh_info'
-; ELFOBJ-NEXT:  'sh_addralign'
-; ELFOBJ-NEXT:  'sh_entsize'
-;;;               BL __aeabi_read_tp is ---+
-;;;                                        V
-; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+; ELFOBJ:      Sections [
+; ELFOBJ:        Section {
+; ELFOBJ:          Name: .text
+; ELFOBJ:          SectionData (
+;;;                  BL __aeabi_read_tp is ---------+
+;;;                                                 V
+; ELFOBJ-NEXT:     0000: 00482DE9 3C009FE5 00109FE7 FEFFFFEB
 
 
 bb1:                                              ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 1272a25..f13bc12 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -t | FileCheck  -check-prefix=OBJ %s
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ASM %s
 
@@ -15,17 +15,20 @@
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
-
-; OBJ:          Section 4
-; OBJ-NEXT:     '.bss'
-
-; OBJ:          'array00'
-; OBJ-NEXT:     'st_value', 0x00000000
-; OBJ-NEXT:     'st_size', 0x00000050
-; OBJ-NEXT:     'st_bind', 0x0
-; OBJ-NEXT:     'st_type', 0x1
-; OBJ-NEXT:     'st_other', 0x00
-; OBJ-NEXT:     'st_shndx', 0x0004
+; OBJ:      Sections [
+; OBJ:        Section {
+; OBJ:          Index: 4
+; OBJ-NEXT:     Name: .bss
+
+; OBJ:      Symbols [
+; OBJ:        Symbol {
+; OBJ:          Name: array00
+; OBJ-NEXT:     Value: 0x0
+; OBJ-NEXT:     Size: 80
+; OBJ-NEXT:     Binding: Local
+; OBJ-NEXT:     Type: Object
+; OBJ-NEXT:     Other: 0
+; OBJ-NEXT:     Section: .bss
 
 define i32 @main(i32 %argc) nounwind {
   %1 = load i32* @sum, align 4
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 1d1b89a..98c0af3 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -79,7 +79,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 266609b8..7a7ca8e 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -74,7 +74,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 1b21f75..9334bf3 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -15,13 +15,13 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.cond
   %v.5 = select i1 undef, i32 undef, i32 0
-  %0 = load i8* undef, align 1, !tbaa !0
+  %0 = load i8* undef, align 1
   %conv88 = zext i8 %0 to i32
   %sub89 = sub nsw i32 0, %conv88
   %v.8 = select i1 undef, i32 undef, i32 %sub89
-  %1 = load i8* null, align 1, !tbaa !0
+  %1 = load i8* null, align 1
   %conv108 = zext i8 %1 to i32
-  %2 = load i8* undef, align 1, !tbaa !0
+  %2 = load i8* undef, align 1
   %conv110 = zext i8 %2 to i32
   %sub111 = sub nsw i32 %conv108, %conv110
   %cmp112 = icmp slt i32 %sub111, 0
@@ -44,6 +44,3 @@ if.end299:                                        ; preds = %for.body, %for.cond
   %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
   ret i32 %s.10
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
index 926daaf..0f1c452 100644
--- a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -18,7 +18,7 @@ bb3:                                              ; preds = %bb4, %bb2
   br i1 %tmp, label %bb4, label %bb67
 
 bb4:                                              ; preds = %bb3
-  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp5 = load <4 x i32>* undef, align 16
   %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
@@ -41,9 +41,9 @@ bb4:                                              ; preds = %bb3
   %tmp24 = trunc i128 %tmp23 to i64
   %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
   %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
-  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp27 = load float* undef, align 4
   %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
-  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp29 = load <4 x i32>* undef, align 16
   %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
@@ -52,10 +52,10 @@ bb4:                                              ; preds = %bb3
   %tmp35 = fmul <4 x float> %tmp34, undef
   %tmp36 = fmul <4 x float> %tmp35, undef
   %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp38 = load float* undef, align 4
   %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
   %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp41 = load float* undef, align 4
   %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
   %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp44 = fmul <4 x float> %tmp33, %tmp43
@@ -64,10 +64,10 @@ bb4:                                              ; preds = %bb3
   %tmp47 = fmul <4 x float> %tmp46, %tmp36
   %tmp48 = fadd <4 x float> undef, %tmp47
   %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp50 = load float* undef, align 4
   %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
   %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
-  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp54 = load float* %tmp52, align 4
   %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
   %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
   %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
@@ -99,7 +99,3 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index f1c85f1..61623ec 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -7,7 +7,7 @@ target triple = "armv7-none-linux-eabi"
 ; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
 define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
 bb:
-  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp = load <2 x float>* undef, align 8
   %tmp2 = extractelement <2 x float> %tmp, i32 0
   %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
   %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
@@ -70,6 +70,3 @@ entry:
 declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
index 5f24e42..a9e2ebb 100644
--- a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -56,9 +56,9 @@ bb3:                                              ; preds = %bb2
   %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
   %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
   %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp42 = load <4 x float>* null, align 16
   %tmp43 = fmul <4 x float> %tmp42, %tmp41
-  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp44 = load <4 x float>* undef, align 16
   %tmp45 = fadd <4 x float> undef, %tmp43
   %tmp46 = fadd <4 x float> undef, %tmp45
   %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
@@ -108,7 +108,7 @@ bb3:                                              ; preds = %bb2
   %tmp89 = fmul <4 x float> undef, %tmp88
   %tmp90 = fadd <4 x float> %tmp89, undef
   %tmp91 = fadd <4 x float> undef, %tmp90
-  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %tmp91, <4 x float>* undef, align 16
   unreachable
 
 bb92:                                             ; preds = %bb2
@@ -116,6 +116,3 @@ bb92:                                             ; preds = %bb2
 }
 
 declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
index 33ad187..0843fdc 100644
--- a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -9,16 +9,13 @@ define arm_aapcs_vfpcc void @foo() nounwind align 2 {
 ; <label>:1                                       ; preds = %0
   %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %2 to <4 x float>
-  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
-  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
-  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16
+  store <4 x float> %3, <4 x float>* undef, align 16
   %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
-  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %4, <4 x float>* undef, align 16
   unreachable
 
 ; <label>:5                                       ; preds = %0
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
index 6f50f27..089dc91 100644
--- a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -20,12 +20,9 @@ bb5:                                              ; preds = %bb4
   %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
   %tmp17 = fadd <4 x float> %tmp16, %arg
-  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  store <4 x float> %tmp17, <4 x float>* undef, align 8
   br label %bb18
 
 bb18:                                             ; preds = %bb5, %bb4
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index ca0964a..a288015 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -26,18 +26,14 @@
 ; CHECK: Successors:
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
-  store volatile i32 65540, i32* %p1, align 4, !tbaa !0
-  %0 = load volatile i32* %p2, align 4, !tbaa !0
+  store volatile i32 65540, i32* %p1, align 4
+  %0 = load volatile i32* %p2, align 4
   ret i32 %0
 }
 
 define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
-  store i32 65540, i32* %p1, align 4, !tbaa !0
-  %0 = load i32* %p2, align 4, !tbaa !0
+  store i32 65540, i32* %p1, align 4
+  %0 = load i32* %p2, align 4
   ret i32 %0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index e4ad45b..adb5c7e 100644
--- a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -129,7 +129,7 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
   %45 = fmul <4 x float> undef, undef
   %46 = fmul <4 x float> %45, %43
   %47 = fmul <4 x float> undef, %44
-  %48 = load <4 x float>* undef, align 8, !tbaa !1
+  %48 = load <4 x float>* undef, align 8
   %49 = bitcast <4 x float> %48 to <2 x i64>
   %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
   %51 = bitcast <1 x i64> %50 to <2 x float>
@@ -145,10 +145,10 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
   %61 = fmul <4 x float> %59, %60
   %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
   %63 = fadd <4 x float> %47, %62
-  store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1
+  store <4 x float> %46, <4 x float>* undef, align 8
   call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
   call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
-  store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1
+  store <4 x float> %63, <4 x float>* undef, align 8
   unreachable
 
 ; <label>:64                                      ; preds = %41, %40
@@ -170,5 +170,3 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
 declare arm_aapcs_vfpcc void @bar(%0*, float)
 
 !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
index 38b9e0e..05abded 100644
--- a/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -6,11 +6,11 @@
 ;CHECK: foo:
 define i32 @foo(i32* %a) nounwind optsize {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %arrayidx1 = getelementptr inbounds i32* %a, i32 1
-  %1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %1 = load i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32* %a, i32 2
-  %2 = load i32* %arrayidx2, align 4, !tbaa !0
+  %2 = load i32* %arrayidx2, align 4
   %add.ptr = getelementptr inbounds i32* %a, i32 3
 ;Make sure we do not have a duplicated register in the front of the reg list
 ;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
@@ -22,7 +22,3 @@ entry:
 }
 
 declare void @bar(i32*) optsize
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
new file mode 100644
index 0000000..4a5ca9d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -0,0 +1,73 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+;CHECK: foo:
+;CHECK: 	sub	sp, sp, #8
+;CHECK: 	push	{r11, lr}
+;CHECK: 	str	r0, [sp, #12]
+;CHECK: 	add	r0, sp, #12
+;CHECK: 	bl	fooUseParam
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	add	sp, sp, #8
+;CHECK: 	mov	pc, lr
+
+;CHECK: foo2:
+;CHECK: 	sub	sp, sp, #16
+;CHECK: 	push	{r11, lr}
+;CHECK: 	str	r0, [sp, #12]
+;CHECK: 	add	r0, sp, #12
+;CHECK: 	str	r2, [sp, #16]
+;CHECK: 	bl	fooUseParam
+;CHECK: 	add	r0, sp, #16
+;CHECK: 	bl	fooUseParam
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	add	sp, sp, #16
+;CHECK: 	mov	pc, lr
+
+;CHECK: doFoo:
+;CHECK: 	push	{r11, lr}
+;CHECK: 	ldr	r0,
+;CHECK: 	ldr	r0, [r0]
+;CHECK: 	bl	foo
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	mov	pc, lr
+
+
+;CHECK: doFoo2:
+;CHECK: 	push	{r11, lr}
+;CHECK: 	ldr	r0,
+;CHECK: 	mov	r1, #0
+;CHECK: 	ldr	r0, [r0]
+;CHECK: 	mov	r2, r0
+;CHECK: 	bl	foo2
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	mov	pc, lr
+
+
+%artz = type { i32 }
+@static_val = constant %artz { i32 777 }
+
+declare void @fooUseParam(%artz* )
+
+define void @foo(%artz* byval %s) {
+  call void @fooUseParam(%artz* %s)
+  ret void
+}
+
+define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) {
+  call void @fooUseParam(%artz* %s)
+  call void @fooUseParam(%artz* %s2)
+  ret void
+}
+
+
+define void @doFoo() {
+  call void @foo(%artz* byval @static_val)
+  ret void
+}
+
+define void @doFoo2() {
+  call void @foo2(%artz* byval @static_val, i32 0, %artz* byval @static_val)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
new file mode 100644
index 0000000..38d515f
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -0,0 +1,95 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.4 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs. 
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;5.5 Parameter Passing, Stage C:
+;
+;C.2.cp If the argument is a CPRC then any co-processor registers in that class
+;that are unallocated are marked as unavailable. The NSAA is adjusted upwards
+;until it is correctly aligned for the argument and the argument is copied to
+;the memory at the adjusted NSAA. The NSAA is further incremented by the size
+;of the argument. The argument has now been allocated.
+;...
+;C.4 If the size in words of the argument is not more than r4 minus NCRN, the
+;argument is copied into core registers, starting at the NCRN. The NCRN is
+;incremented by the number of registers used. Successive registers hold the
+;parts of the argument they would hold if its value were loaded into those
+;registers from memory using an LDM instruction. The argument has now been
+;allocated.
+;
+;What is actually checked here:
+;Here we check that i32 param goes to r0.
+;
+;Current test-case was produced with command:
+;arm-linux-gnueabihf-clang -mcpu=cortex-a9 params-to-GPR.c -S -O1 -emit-llvm
+;
+;// params-to-GRP.c:
+;
+;void fooUseI32(unsigned);
+;
+;void foo(long double p0,
+;         long double p1,
+;         long double p2,
+;         long double p3,
+;         long double p4,
+;         long double p5,
+;         long double p6,
+;         long double p7,
+;         long double p8,
+;         unsigned p9) {
+;  fooUseI32(p9);
+;}
+;
+;void doFoo() {
+;  foo( 1,2,3,4,5,6,7,8,9, 43 );
+;}
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;
+;CHECK:     foo:
+;CHECK-NOT:     mov r0
+;CHECK-NOT:     ldr r0
+;CHECK:         bl fooUseI32
+;CHECK:     doFoo:
+;CHECK:         movs    r0, #43
+;CHECK:         bl      foo
+
+define void @foo(double %p0, ; --> D0
+                 double %p1, ; --> D1
+		 double %p2, ; --> D2
+		 double %p3, ; --> D3
+		 double %p4, ; --> D4
+		 double %p5, ; --> D5
+		 double %p6, ; --> D6
+		 double %p7, ; --> D7
+		 double %p8, ; --> Stack
+		 i32 %p9) #0 { ; --> R0, not Stack+8
+entry:
+  tail call void @fooUseI32(i32 %p9)
+  ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+  tail call void @foo(double 23.0, ; --> D0
+                      double 23.1, ; --> D1
+		      double 23.2, ; --> D2
+                      double 23.3, ; --> D3
+                      double 23.4, ; --> D4
+                      double 23.5, ; --> D5
+                      double 23.6, ; --> D6
+                      double 23.7, ; --> D7
+                      double 23.8, ; --> Stack
+                      i32 43)      ; --> R0, not Stack+8
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
new file mode 100644
index 0000000..446403d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
@@ -0,0 +1,61 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.5 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs, then pass
+;              byval parameter and check that it goes to stack only.
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;For more information,
+;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5
+;
+;
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%struct_t = type { i32, i32, i32, i32 }
+@static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 }
+declare void @fooUseStruct(%struct_t*)
+
+define void @foo2(double %p0, ; --> D0
+                  double %p1, ; --> D1
+		  double %p2, ; --> D2
+		  double %p3, ; --> D3
+		  double %p4, ; --> D4
+		  double %p5, ; --> D5
+		  double %p6, ; --> D6
+		  double %p7, ; --> D7
+		  double %p8, ; --> Stack
+		  i32 %p9,    ; --> R0
+                  %struct_t* byval %p10) ; --> Stack+8
+{
+entry:
+;CHECK:     push.w {r11, lr}
+;CHECK-NOT: stm
+;CHECK:     add r0, sp, #16
+;CHECK:     bl fooUseStruct
+  call void @fooUseStruct(%struct_t* %p10)
+
+  ret void
+}
+
+define void @doFoo2() {
+entry:
+;CHECK-NOT: ldm
+  tail call void @foo2(double 23.0, ; --> D0
+                       double 23.1, ; --> D1
+		       double 23.2, ; --> D2
+                       double 23.3, ; --> D3
+                       double 23.4, ; --> D4
+                       double 23.5, ; --> D5
+                       double 23.6, ; --> D6
+                       double 23.7, ; --> D7
+                       double 23.8, ; --> Stack
+                       i32 43,      ; --> R0, not Stack+8
+                       %struct_t* byval @static_val) ; --> Stack+8, not R1     
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
index 2561686..4599928 100644
--- a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
+++ b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -1,18 +1,17 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
-; The test is presented by Jiangning Liu.
-;CHECK-NOT: vldmia
+; PR14824. The test is presented by Jiangning Liu. If the ld/st optimization algorithm is changed, this test case may fail.
+; Also if the machine code for ld/st optimizor is changed, this test case may fail. If so, remove this test.
 
 define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+; CHECK: sample_test
+; CHECK-NOT: vldmia
+; CHECK: add
 entry:
+
+; Load %source
   %s0 = load <8 x i64> * %source, align 64
-  %s1 = load <8 x i64> * %secondSource, align 64
-  %s2 = bitcast <8 x i64> %s0 to i512
-  %data.i.i.48.extract.shift = lshr i512 %s2, 384
-  %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64
   %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
   %s120 = load <8 x i64> * %arrayidx64, align 64
-  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
-  %s121 = load <8 x i64> * %arrayidx67, align 64
   %s122 = bitcast <8 x i64> %s120 to i512
   %data.i.i677.48.extract.shift = lshr i512 %s122, 384
   %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
@@ -32,6 +31,11 @@ entry:
   %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
   %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
   %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+
+; Load %secondSource
+  %s1 = load <8 x i64> * %secondSource, align 64
+  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+  %s121 = load <8 x i64> * %arrayidx67, align 64
   %s131 = bitcast <8 x i64> %s121 to i512
   %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
   %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
@@ -51,34 +55,16 @@ entry:
   %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
   %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
   %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+
+; Operations about %Source and %secondSource
   %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
   %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
   %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
   %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
   store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
-  %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7
-  %s140 = load <8 x i64> * %arrayidx75, align 64
   %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
   %s141 = load <8 x i64> * %arrayidx78, align 64
-  %s142 = bitcast <8 x i64> %s140 to i512
-  %data.i.i650.32.extract.shift = lshr i512 %s142, 256
-  %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64
-  %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0
-  %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1
-  %data.i.i650.16.extract.shift = lshr i512 %s142, 128
-  %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64
-  %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2
-  %data.i.i650.8.extract.shift = lshr i512 %s142, 64
-  %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64
-  %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3
-  %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4
-  %data.i.i650.48.extract.shift = lshr i512 %s142, 384
-  %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64
-  %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5
-  %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6
-  %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64
-  %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7
   %s151 = bitcast <8 x i64> %s141 to i512
   %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
   %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
@@ -90,21 +76,7 @@ entry:
   %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
   %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
   %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
-  %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4
-  %data.i1.i649.48.extract.shift = lshr i512 %s151, 384
-  %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64
-  %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5
-  %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6
-  %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64
-  %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7
-  %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
-  %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
-  %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
-  %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
-  store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64
+  store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64
   ret void
 }
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
new file mode 100644
index 0000000..de5fd31
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -0,0 +1,28 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.1.cp statement for VA functions.
+;Note: There are no VFP CPRCs in a variadic procedure.
+;Check that after %C was sent to stack, we set Next Core Register Number to R4.
+
+;This test is simplified IR version of
+;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+@.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1
+
+;CHECK: printfn:
+define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
+entry:
+  %conv = sext i16 %b to i32
+  %conv1 = sext i8 %E to i32
+  %call = tail call i32 (i8*, ...)* @printf(
+	i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0
+        i32 %a,                                          ; --> R1
+        i32 %conv,                                       ; --> R2
+        double %C,                                       ; --> SP, NCRN := R4
+;CHECK:    str r2, [sp, #8]                                                                     
+        i32 %conv1)                                      ; --> SP+8
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
new file mode 100644
index 0000000..6db71fe
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -0,0 +1,48 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888}
+
+declare void @fooUseStruct(%st_t*)
+
+define void @foo(double %vfp0,     ; --> D0,     NSAA=SP
+                 double %vfp1,     ; --> D1,     NSAA=SP
+		 double %vfp2,     ; --> D2,     NSAA=SP
+		 double %vfp3,     ; --> D3,     NSAA=SP
+		 double %vfp4,     ; --> D4,     NSAA=SP
+		 double %vfp5,     ; --> D5,     NSAA=SP
+		 double %vfp6,     ; --> D6,     NSAA=SP
+		 double %vfp7,     ; --> D7,     NSAA=SP
+		 double %vfp8,     ; --> SP,     NSAA=SP+8 (!)
+                 i32 %p0,          ; --> R0,     NSAA=SP+8 
+		 %st_t* byval %p1, ; --> R1, R2, NSAA=SP+8
+		 i32 %p2,          ; --> R3,     NSAA=SP+8 
+                 i32 %p3) #0 {     ; --> SP+4,   NSAA=SP+12
+entry:
+  ;CHECK: sub sp, #8
+  ;CHECK: push.w {r11, lr}
+  ;CHECK: add r0, sp, #16
+  ;CHECK: str r2, [sp, #20]
+  ;CHECK: str r1, [sp, #16]
+  ;CHECK: bl  fooUseStruct
+  call void @fooUseStruct(%st_t* %p1)
+  ret void
+}
+
+define void @doFoo() {
+entry:
+  call void @foo(double 23.0,
+                 double 23.1,
+                 double 23.2,
+                 double 23.3,
+                 double 23.4,
+                 double 23.5,
+                 double 23.6,
+                 double 23.7,
+                 double 23.8,
+                 i32 0, %st_t* byval @static_val, i32 1, i32 2)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
new file mode 100644
index 0000000..212bbc2
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -0,0 +1,45 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32, i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878}
+
+define void @foo(double %vfp0,     ; --> D0,              NSAA=SP
+                 double %vfp1,     ; --> D1,              NSAA=SP
+		 double %vfp2,     ; --> D2,              NSAA=SP
+		 double %vfp3,     ; --> D3,              NSAA=SP
+		 double %vfp4,     ; --> D4,              NSAA=SP
+		 double %vfp5,     ; --> D5,              NSAA=SP
+		 double %vfp6,     ; --> D6,              NSAA=SP
+		 double %vfp7,     ; --> D7,              NSAA=SP
+		 double %vfp8,     ; --> SP,              NSAA=SP+8 (!)
+                 i32 %p0,          ; --> R0,              NSAA=SP+8 
+		 %st_t* byval %p1, ; --> SP+8, 4 words    NSAA=SP+24
+		 i32 %p2) #0 {     ; --> SP+24,           NSAA=SP+24 
+                 
+entry:
+  ;CHECK:  push.w {r11, lr}
+  ;CHECK:  ldr    r0, [sp, #32]
+  ;CHECK:  bl     fooUseI32
+  call void @fooUseI32(i32 %p2)
+  ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+  call void @foo(double 23.0,
+                 double 23.1,
+                 double 23.2,
+                 double 23.3,
+                 double 23.4,
+                 double 23.5,
+                 double 23.6,
+                 double 23.7,
+                 double 23.8,
+                 i32 0, %st_t* byval @static_val, i32 1)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
new file mode 100644
index 0000000..abc6e0d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; rdar://13782395
+
+define i32 @t1(i32 %a, i32 %b, i8** %retaddr) {
+; CHECK: t1:
+; CHECK: Block address taken
+; CHECK-NOT: Address of block that was removed by CodeGen
+  store i8* blockaddress(@t1, %cond_true), i8** %retaddr
+  %tmp2 = icmp eq i32 %a, 0
+  br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+  %tmp5 = add i32 %b, 1
+  ret i32 %tmp5
+
+cond_false:
+  %tmp7 = add i32 %b, -1
+  ret i32 %tmp7
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d, i8** %retaddr) {
+; CHECK: t2:
+; CHECK: Block address taken
+; CHECK: %cond_true
+; CHECK: add
+; CHECK: bx lr
+  store i8* blockaddress(@t2, %cond_true), i8** %retaddr
+  %tmp2 = icmp sgt i32 %c, 10
+  %tmp5 = icmp slt i32 %d, 4
+  %tmp8 = and i1 %tmp5, %tmp2
+  %tmp13 = add i32 %b, %a
+  br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+  %tmp15 = add i32 %tmp13, %c
+  %tmp1821 = sub i32 %tmp15, %d
+  ret i32 %tmp1821
+
+UnifiedReturnBlock:
+  ret i32 %tmp13
+}
+
+define hidden fastcc void @t3(i8** %retaddr) {
+; CHECK: t3:
+; CHECK: Block address taken
+; CHECK-NOT: Address of block that was removed by CodeGen
+bb:
+  store i8* blockaddress(@t3, %KBBlockZero_return_1), i8** %retaddr
+  br i1 undef, label %bb77, label %bb7.i
+
+bb7.i:                                            ; preds = %bb35
+  br label %bb2.i
+
+KBBlockZero_return_1:                             ; preds = %KBBlockZero.exit
+  unreachable
+
+KBBlockZero_return_0:                             ; preds = %KBBlockZero.exit
+  unreachable
+
+bb77:                                             ; preds = %bb26, %bb12, %bb
+  ret void
+
+bb2.i:                                            ; preds = %bb6.i350, %bb7.i
+  br i1 undef, label %bb6.i350, label %KBBlockZero.exit
+
+bb6.i350:                                         ; preds = %bb2.i
+  br label %bb2.i
+
+KBBlockZero.exit:                                 ; preds = %bb2.i
+  indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
+}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index c5d00a0..c14f530 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -91,7 +91,7 @@ entry:
 ; CHECK: t4
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: if.then
-; CHECK-NOT movs
+; CHECK-NOT: movs
   %0 = load double* %q, align 4
   %cmp = fcmp olt double %0, 1.000000e+01
   %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index 769ba55..fbc25b4 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -32,7 +32,7 @@ for.body:                                         ; preds = %entry, %if.end8
   %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
   %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
   %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
-  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %0 = load i32* %arrayidx, align 4
   %mul = mul i32 %0, %0
   %sub = add nsw i32 %i.012, -5
   %cmp2 = icmp eq i32 %sub, %Pref
@@ -53,7 +53,7 @@ if.else:                                          ; preds = %for.body
 if.end8:                                          ; preds = %if.else, %if.then
   %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
   %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
-  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %mul, i32* %arrayidx, align 4
   %inc = add i32 %i.012, 1
   %cmp = icmp eq i32 %inc, 11
   br i1 %cmp, label %for.end, label %for.body
@@ -61,7 +61,3 @@ if.end8:                                          ; preds = %if.else, %if.then
 for.end:                                          ; preds = %if.end8
   ret i32 %BestIdx.1
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
new file mode 100644
index 0000000..e9e0fe3
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
+
+; PR15525
+; CHECK: test1:
+; CHECK: ldr.w	[[REG:r[0-9]+]], [sp]
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r1, r2
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
+; CHECK-NEXT: vst1.8	{{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+; CHECK-NEXT: bx	lr
+define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
+bb:
+  %tmp = extractvalue [4 x i64] %vec.coerce, 0
+  %tmp2 = bitcast i64 %tmp to <8 x i8>
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp4 = extractvalue [4 x i64] %vec.coerce, 1
+  %tmp5 = bitcast i64 %tmp4 to <8 x i8>
+  %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 33c8e9d..c162260 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -31,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 95e6cf2..38945ac 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -40,7 +40,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index e3e4d06..e4040fa 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -60,7 +60,7 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 038c229..1de6ffa 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index f3af0b9..1868942 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -65,7 +65,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ae02a24..ba83f79 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -41,7 +41,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
index c42839d..4c92a29 100644
--- a/test/CodeGen/ARM/ehabi-filters.ll
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -19,7 +19,7 @@ define i32 @main() {
 entry:
   %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
   %0 = bitcast i8* %exception.i to i32*
-  store i32 42, i32* %0, align 4, !tbaa !0
+  store i32 42, i32* %0, align 4
   invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
           to label %unreachable.i unwind label %lpad.i
 
@@ -71,7 +71,3 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
 declare i8* @__cxa_begin_catch(i8*)
 
 declare void @__cxa_end_catch()
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
new file mode 100644
index 0000000..11f3e6d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+define void @_Z4testv() {
+entry:
+  tail call void @_Z15throw_exceptionv()
+  ret void
+}
+
+declare void @_Z15throw_exceptionv()
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 80849b80
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0808480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
new file mode 100644
index 0000000..79dba08
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM-RELOC
+
+define i32 @_Z3addiiiiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  %add2 = add nsw i32 %add1, %d
+  tail call void @_Z15throw_exceptioni(i32 %add2)
+  %add3 = add nsw i32 %f, %e
+  %add4 = add nsw i32 %add3, %g
+  %add5 = add nsw i32 %add4, %h
+  tail call void @_Z15throw_exceptioni(i32 %add5)
+  %add6 = add nsw i32 %add5, %add2
+  ret i32 %add6
+}
+
+declare void @_Z15throw_exceptioni(i32)
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 419b0181 b0b08384
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0838480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr1
+
+; CHECK-FP-ELIM-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
index 5e4b509..616aa1b 100644
--- a/test/CodeGen/ARM/ehabi-mc-section-group.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s -sd \
 ; RUN:   | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
@@ -68,12 +68,21 @@ declare void @__cxa_end_catch()
 
 declare void @_ZSt9terminatev()
 
-; CHECK:      # Section 1
-; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
-; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
-; CHECK:      # Section 10
-; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 12
-; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 14
-; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .group (47)
+; CHECK:          SectionData (
+; CHECK-NEXT:       0000: 01000000 09000000 0B000000 0D000000
+; CHECK-NEXT:     )
+
+; CHECK:        Section {
+; CHECK:          Index: 9
+; CHECK-NEXT:     Name: .text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (214)
+
+; CHECK:        Section {
+; CHECK:          Index: 11
+; CHECK-NEXT:     Name: .ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (204)
+
+; CHECK:        Section {
+; CHECK:          Index: 13
+; CHECK-NEXT:     Name: .ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (90)
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
index fc51b24..4e6e468 100644
--- a/test/CodeGen/ARM/ehabi-mc-section.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .test_section
 ; CHECK: section .ARM.extab.test_section
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx.test_section
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .test_section
+; CHECK-FP-ELIM: section .ARM.extab.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
index f90e5f3..ac0a0fc 100644
--- a/test/CodeGen/ARM/ehabi-mc-sh_link.ll
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s \
 ; RUN:   | FileCheck %s
 
 define void @test1() nounwind {
@@ -20,28 +20,39 @@ entry:
   ret void
 }
 
-; CHECK: # Section 1
-; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
-
-; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x0000005c)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000001)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
-
-; CHECK: # Section 7
-; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
-
-; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x00000068)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000007)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+; CHECK:      Sections [
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .text (16)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx (5)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x5C
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 1
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
+
+; CHECK:        Section {
+; CHECK:          Index: 7
+; CHECK-NEXT:     Name: .test_section (57)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx.test_section (47)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x68
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 7
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
index 0dc2ef7..83b8425 100644
--- a/test/CodeGen/ARM/ehabi-mc.ll
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .text
 ; CHECK: section .ARM.extab
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM: section .ARM.extab
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll
new file mode 100644
index 0000000..0002711
--- /dev/null
+++ b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s
+
+; This test makes sure spills of 64-bit pairs in Thumb mode actually
+; generate thumb instructions. Previously we were inserting an ARM
+; STMIA which happened to have the same encoding.
+
+define void @foo(i64* %addr) {
+  %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+
+  ; Make sure we are actually creating the Thumb versions of the spill
+  ; instructions.
+; CHECK: t2STRDi8
+; CHECK: t2LDRDi8
+
+  store volatile i64 %val1, i64* %addr
+  store volatile i64 %val2, i64* %addr
+  store volatile i64 %val3, i64* %addr
+  store volatile i64 %val4, i64* %addr
+  store volatile i64 %val5, i64* %addr
+  store volatile i64 %val6, i64* %addr
+  store volatile i64 %val7, i64* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll
new file mode 100644
index 0000000..ef3e5a5
--- /dev/null
+++ b/test/CodeGen/ARM/gpr-paired-spill.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+
+define void @foo(i64* %addr) {
+  %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+
+  ; Key point is that enough 64-bit paired GPR values are live that
+  ; one of them has to be spilled. This used to cause an abort because
+  ; an LDMIA was created with both a FrameIndex and an offset, which
+  ; is not allowed.
+
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
+  ; We also want to ensure the register scavenger is working (i.e. an
+  ; offset from sp can be generated), so we need two spills.
+; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
+; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+
+  ; In principle LLVM may have to recalculate the offset. At the moment
+  ; it reuses the original though.
+; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+
+  store volatile i64 %val1, i64* %addr
+  store volatile i64 %val2, i64* %addr
+  store volatile i64 %val3, i64* %addr
+  store volatile i64 %val4, i64* %addr
+  store volatile i64 %val5, i64* %addr
+  store volatile i64 %val6, i64* %addr
+  store volatile i64 %val7, i64* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 5b4cf9d..9b0f3e5 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -26,8 +26,8 @@ outer.loop:                                 ; preds = %for.inc69, %entry
   %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
   %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
   %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
-  %tmp5 = load i64* %offset, align 4, !tbaa !0
-  %tmp15 = load i64* %len, align 4, !tbaa !0
+  %tmp5 = load i64* %offset, align 4
+  %tmp15 = load i64* %len, align 4
   %add = add nsw i64 %tmp15, %tmp5
   br label %inner.loop
 
@@ -40,8 +40,8 @@ inner.loop:                                       ; preds = %for.inc, %outer.loo
 if.end:                                           ; preds = %inner.loop
   %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
   %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
-  %tmp29 = load i64* %offset28, align 4, !tbaa !0
-  %tmp40 = load i64* %len39, align 4, !tbaa !0
+  %tmp29 = load i64* %offset28, align 4
+  %tmp40 = load i64* %len39, align 4
   %add41 = add nsw i64 %tmp40, %tmp29
   %cmp44 = icmp sge i64 %tmp29, %tmp5
   %cmp47 = icmp slt i64 %tmp29, %add
@@ -74,7 +74,3 @@ for.end72:                                        ; preds = %for.inc69, %entry
   %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
   ret i32 %overlap.0.lcssa
 }
-
-!0 = metadata !{metadata !"long long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
new file mode 100644
index 0000000..4b15326
--- /dev/null
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; Loop counter copies should be eliminated.
+; There is also a MUL here, but we don't care where it is scheduled.
+; CHECK: postinc
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: t2LDRs
+; CHECK: t2ADDrr
+; CHECK: t2CMPrr
+; CHECK: COPY
+define i32 @postinc(i32 %a, i32* nocapture %d, i32 %s) nounwind {
+entry:
+  %cmp4 = icmp eq i32 %a, 0
+  br i1 %cmp4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
+  %indvars.iv.next = add i32 %indvars.iv, %s
+  %arrayidx = getelementptr inbounds i32* %d, i32 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, %s.05
+  %exitcond = icmp eq i32 %indvars.iv.next, %a
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %mul, %for.body ]
+  ret i32 %s.0.lcssa
+}
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
new file mode 100644
index 0000000..bf2770b
--- /dev/null
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %a) nounwind {
+; CHECK: test1:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <4 x i32> @test2(<4 x i32> %a) nounwind {
+; CHECK: test2:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sge <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <8 x i16> @test3(<8 x i16> %a) nounwind {
+; CHECK: test3:
+; CHECK: vabs.s16 q
+        %tmp1neg = sub <8 x i16> zeroinitializer, %a
+        %b = icmp sgt <8 x i16> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
+        ret <8 x i16> %abs
+}
+
+define <16 x i8> @test4(<16 x i8> %a) nounwind {
+; CHECK: test4:
+; CHECK: vabs.s8 q
+        %tmp1neg = sub <16 x i8> zeroinitializer, %a
+        %b = icmp slt <16 x i8> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
+        ret <16 x i8> %abs
+}
+
+define <4 x i32> @test5(<4 x i32> %a) nounwind {
+; CHECK: test5:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sle <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+        ret <4 x i32> %abs
+}
+
+define <2 x i32> @test6(<2 x i32> %a) nounwind {
+; CHECK: test6:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+        %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
+        ret <2 x i32> %abs
+}
+
+define <2 x i32> @test7(<2 x i32> %a) nounwind {
+; CHECK: test7:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sge <2 x i32> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
+        ret <2 x i32> %abs
+}
+
+define <4 x i16> @test8(<4 x i16> %a) nounwind {
+; CHECK: test8:
+; CHECK: vabs.s16 d
+        %tmp1neg = sub <4 x i16> zeroinitializer, %a
+        %b = icmp sgt <4 x i16> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
+        ret <4 x i16> %abs
+}
+
+define <8 x i8> @test9(<8 x i8> %a) nounwind {
+; CHECK: test9:
+; CHECK: vabs.s8 d
+        %tmp1neg = sub <8 x i8> zeroinitializer, %a
+        %b = icmp slt <8 x i8> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a
+        ret <8 x i8> %abs
+}
+
+define <2 x i32> @test10(<2 x i32> %a) nounwind {
+; CHECK: test10:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sle <2 x i32> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
+        ret <2 x i32> %abs
+}
diff --git a/test/CodeGen/ARM/nop_concat_vectors.ll b/test/CodeGen/ARM/nop_concat_vectors.ll
new file mode 100644
index 0000000..c810900
--- /dev/null
+++ b/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+;CHECK: _foo
+;CHECK-NOT: vld1.32
+;CHECK-NOT: vst1.32
+;CHECK: bx
+define void @foo(<16 x i8>* %J) {
+  %A = load <16 x i8>* %J
+  %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %T2 = shufflevector <8 x i8>  %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  store <16 x i8> %T2, <16 x i8>* %J
+  ret void
+}
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index f93ffe7..94578d8 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,10 +1,11 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
-; RUN: grep .Lfoo: %t
-; RUN: egrep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep long.*\.Lbaz %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; CHECK: .Lfoo:
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: .long .Lbaz
+; CHECK: .Lbaz:
 
 define private void @foo() {
         ret void
diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll
new file mode 100644
index 0000000..670b12f
--- /dev/null
+++ b/test/CodeGen/ARM/returned-ext.ll
@@ -0,0 +1,178 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+
+declare i16 @identity16(i16 returned %x)
+declare i32 @identity32(i32 returned %x)
+declare zeroext i16 @retzext16(i16 returned %x)
+declare i16 @paramzext16(i16 zeroext returned %x)
+declare zeroext i16 @bothzext16(i16 zeroext returned %x)
+
+; The zeroext param attribute below is meant to have no effect
+define i16 @test_identity(i16 zeroext %x) {
+entry:
+; CHECKELF: test_identity:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: bl identity16
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_identity:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: blx _identity16
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @identity16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+; FIXME: This ought not to require register saving but currently does because
+; x is not considered equal to %call (see SelectionDAGBuilder.cpp)
+define i16 @test_matched_ret(i16 %x) {
+entry:
+; CHECKELF: test_matched_ret:
+
+; This shouldn't be required
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+
+; CHECKELF: bl retzext16
+; CHECKELF-NOT: uxth r0, {{r[0-9]+}}
+; CHECKELF: bl identity32
+
+; This shouldn't be required
+; CHECKELF: mov r0, [[SAVEX]]
+
+; CHECKT2D: test_matched_ret:
+
+; This shouldn't be required
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+
+; CHECKT2D: blx _retzext16
+; CHECKT2D-NOT: uxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _identity32
+
+; This shouldn't be required
+; CHECKT2D: mov r0, [[SAVEX]]
+
+  %call = tail call i16 @retzext16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+define i16 @test_mismatched_ret(i16 %x) {
+entry:
+; CHECKELF: test_mismatched_ret:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: bl retzext16
+; CHECKELF: sxth r0, {{r[0-9]+}}
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_mismatched_ret:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: blx _retzext16
+; CHECKT2D: sxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @retzext16(i16 %x)
+  %b = sext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+define i16 @test_matched_paramext(i16 %x) {
+entry:
+; CHECKELF: test_matched_paramext:
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl paramzext16
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl identity32
+; CHECKELF: b paramzext16
+; CHECKT2D: test_matched_paramext:
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _paramzext16
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _identity32
+; CHECKT2D: b.w _paramzext16
+  %call = tail call i16 @paramzext16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  %call3 = tail call i16 @paramzext16(i16 %call)
+  ret i16 %call3
+}
+
+; FIXME: This theoretically ought to optimize to exact same output as the
+; version above, but doesn't currently (see SelectionDAGBuilder.cpp) 
+define i16 @test_matched_paramext2(i16 %x) {
+entry:
+
+; Since there doesn't seem to be an unambiguous optimal selection and
+; scheduling of uxth and mov instructions below in lieu of the 'returned'
+; optimization, don't bother checking: just verify that the calls are made
+; in the correct order as a basic sanity check
+
+; CHECKELF: test_matched_paramext2:
+; CHECKELF: bl paramzext16
+; CHECKELF: bl identity32
+; CHECKELF: b paramzext16
+; CHECKT2D: test_matched_paramext2:
+; CHECKT2D: blx _paramzext16
+; CHECKT2D: blx _identity32
+; CHECKT2D: b.w _paramzext16
+  %call = tail call i16 @paramzext16(i16 %x)
+
+; Should make no difference if %x is used below rather than %call, but it does
+  %b = zext i16 %x to i32
+
+  %call2 = tail call i32 @identity32(i32 %b)
+  %call3 = tail call i16 @paramzext16(i16 %call)
+  ret i16 %call3
+}
+
+define i16 @test_matched_bothext(i16 %x) {
+entry:
+; CHECKELF: test_matched_bothext:
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl bothzext16
+; CHECKELF-NOT: uxth r0, r0
+
+; FIXME: Tail call should be OK here
+; CHECKELF: bl identity32
+
+; CHECKT2D: test_matched_bothext:
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _bothzext16
+; CHECKT2D-NOT: uxth r0, r0
+
+; FIXME: Tail call should be OK here
+; CHECKT2D: blx _identity32
+
+  %call = tail call i16 @bothzext16(i16 %x)
+  %b = zext i16 %x to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %call
+}
+
+define i16 @test_mismatched_bothext(i16 %x) {
+entry:
+; CHECKELF: test_mismatched_bothext:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: uxth r0, {{r[0-9]+}}
+; CHECKELF: bl bothzext16
+; CHECKELF: sxth r0, [[SAVEX]]
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_mismatched_bothext:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: uxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _bothzext16
+; CHECKT2D: sxth r0, [[SAVEX]]
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @bothzext16(i16 %x)
+  %b = sext i16 %x to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
index e015bf0..eb4d0ba 100644
--- a/test/CodeGen/ARM/tail-dup.ll
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -11,19 +11,19 @@
 
 define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
 entry:
-  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %0 = load i32* %opcodes, align 4
   %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
   br label %indirectgoto
 
 INCREMENT:                                        ; preds = %indirectgoto
   %inc = add nsw i32 %result.0, 1
-  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %1 = load i32* %opcodes.addr.0, align 4
   %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
   br label %indirectgoto
 
 DECREMENT:                                        ; preds = %indirectgoto
   %dec = add nsw i32 %result.0, -1
-  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %2 = load i32* %opcodes.addr.0, align 4
   %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
   br label %indirectgoto
 
@@ -38,7 +38,3 @@ indirectgoto:                                     ; preds = %DECREMENT, %INCREME
 RETURN:                                           ; preds = %indirectgoto
   ret i32 %result.0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll
new file mode 100644
index 0000000..f06e4a4
--- /dev/null
+++ b/test/CodeGen/ARM/this-return.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+
+%struct.A = type { i8 }
+%struct.B = type { i32 }
+%struct.C = type { %struct.B }
+%struct.D = type { %struct.B }
+%struct.E = type { %struct.B, %struct.B }
+
+declare %struct.A* @A_ctor_base(%struct.A* returned)
+declare %struct.B* @B_ctor_base(%struct.B* returned, i32)
+declare %struct.B* @B_ctor_complete(%struct.B* returned, i32)
+
+declare %struct.A* @A_ctor_base_nothisret(%struct.A*)
+declare %struct.B* @B_ctor_base_nothisret(%struct.B*, i32)
+declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32)
+
+define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_base:
+; CHECKELF-NOT: mov {{r[0-9]+}}, r0
+; CHECKELF: bl A_ctor_base
+; CHECKELF-NOT: mov r0, {{r[0-9]+}}
+; CHECKELF: b B_ctor_base
+; CHECKT2D: C_ctor_base:
+; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
+; CHECKT2D: blx _A_ctor_base
+; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
+; CHECKT2D: b.w _B_ctor_base
+  %0 = bitcast %struct.C* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_base_nothisret:
+; CHECKELF: mov [[SAVETHIS:r[0-9]+]], r0
+; CHECKELF: bl A_ctor_base_nothisret
+; CHECKELF: mov r0, [[SAVETHIS]]
+; CHECKELF-NOT: b B_ctor_base_nothisret
+; CHECKT2D: C_ctor_base_nothisret:
+; CHECKT2D: mov [[SAVETHIS:r[0-9]+]], r0
+; CHECKT2D: blx _A_ctor_base_nothisret
+; CHECKT2D: mov r0, [[SAVETHIS]]
+; CHECKT2D-NOT: b.w _B_ctor_base_nothisret
+  %0 = bitcast %struct.C* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_complete:
+; CHECKELF: b C_ctor_base
+; CHECKT2D: C_ctor_complete:
+; CHECKT2D: b.w _C_ctor_base
+  %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_complete_nothisret:
+; CHECKELF-NOT: b C_ctor_base_nothisret
+; CHECKT2D: C_ctor_complete_nothisret:
+; CHECKT2D-NOT: b.w _C_ctor_base_nothisret
+  %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) {
+entry:
+; CHECKELF: D_ctor_base:
+; CHECKELF-NOT: mov {{r[0-9]+}}, r0
+; CHECKELF: bl B_ctor_complete
+; CHECKELF-NOT: mov r0, {{r[0-9]+}}
+; CHECKELF: b B_ctor_complete
+; CHECKT2D: D_ctor_base:
+; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
+; CHECKT2D: blx _B_ctor_complete
+; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
+; CHECKT2D: b.w _B_ctor_complete
+  %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0
+  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  ret %struct.D* %this
+}
+
+define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) {
+entry:
+; CHECKELF: E_ctor_base:
+; CHECKELF-NOT: b B_ctor_complete
+; CHECKT2D: E_ctor_base:
+; CHECKT2D-NOT: b.w _B_ctor_complete
+  %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
+  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1
+  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
+  ret %struct.E* %this
+}
diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll
new file mode 100644
index 0000000..b86d5db
--- /dev/null
+++ b/test/CodeGen/ARM/v1-constant-fold.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon  | FileCheck %s
+
+; PR15611. Check that we don't crash when constant folding v1i32 types.
+
+; CHECK: foo:
+define void @foo(i32 %arg) {
+bb:
+  %tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+  %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 1
+  %tmp2 = insertelement <4 x i32> %tmp1, i32 0, i32 2
+  %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3
+  %tmp4 = add <4 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK:  bl bar
+  tail call void @bar(<4 x i32> %tmp4)
+  ret void
+}
+
+declare void @bar(<4 x i32>)
diff --git a/test/CodeGen/ARM/vcvt-cost.ll b/test/CodeGen/ARM/vcvt-cost.ll
new file mode 100644
index 0000000..0d45c40
--- /dev/null
+++ b/test/CodeGen/ARM/vcvt-cost.ll
@@ -0,0 +1,153 @@
+; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
+; instructions as expensive. If lowering is improved the cost model needs to
+; change.
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
+%T0_5 = type <8 x i8>
+%T1_5 = type <8 x i32>
+; CHECK: func_cvt5:
+define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
+; CHECK: vmovl.s8
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %T0_5* %loadaddr
+; COST: func_cvt5
+; COST: cost of 3 {{.*}} sext
+  %r = sext %T0_5 %v0 to %T1_5
+  store %T1_5 %r, %T1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TA0_5 = type <8 x i8>
+%TA1_5 = type <8 x i32>
+; CHECK: func_cvt1:
+define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
+; CHECK: vmovl.u8
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TA0_5* %loadaddr
+; COST: func_cvt1
+; COST: cost of 3 {{.*}} zext
+  %r = zext %TA0_5 %v0 to %TA1_5
+  store %TA1_5 %r, %TA1_5* %storeaddr
+  ret void
+}
+
+%T0_51 = type <8 x i32>
+%T1_51 = type <8 x i8>
+; CHECK: func_cvt51:
+define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i16
+  %v0 = load %T0_51* %loadaddr
+; COST: func_cvt51
+; COST: cost of 3 {{.*}} trunc
+  %r = trunc %T0_51 %v0 to %T1_51
+  store %T1_51 %r, %T1_51* %storeaddr
+  ret void
+}
+
+%TT0_5 = type <16 x i8>
+%TT1_5 = type <16 x i32>
+; CHECK: func_cvt52:
+define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %TT0_5* %loadaddr
+; COST: func_cvt52
+; COST: cost of 6 {{.*}} sext
+  %r = sext %TT0_5 %v0 to %TT1_5
+  store %TT1_5 %r, %TT1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TTA0_5 = type <16 x i8>
+%TTA1_5 = type <16 x i32>
+; CHECK: func_cvt12:
+define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TTA0_5* %loadaddr
+; COST: func_cvt12
+; COST: cost of 6 {{.*}} zext
+  %r = zext %TTA0_5 %v0 to %TTA1_5
+  store %TTA1_5 %r, %TTA1_5* %storeaddr
+  ret void
+}
+
+%TT0_51 = type <16 x i32>
+%TT1_51 = type <16 x i8>
+; CHECK: func_cvt512:
+define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i16
+; CHECK: vmovn.i16
+  %v0 = load %TT0_51* %loadaddr
+; COST: func_cvt512
+; COST: cost of 6 {{.*}} trunc
+  %r = trunc %TT0_51 %v0 to %TT1_51
+  store %TT1_51 %r, %TT1_51* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v4i16_v4i64:
+define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: sext_v4i16_v4i64
+; COST: cost of 3 {{.*}} sext
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v4i16_v4i64:
+define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: zext_v4i16_v4i64
+; COST: cost of 3 {{.*}} zext
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v8i16_v8i64:
+define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: sext_v8i16_v8i64
+; COST: cost of 6 {{.*}} sext
+  %r = sext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v8i16_v8i64:
+define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: zext_v8i16_v8i64
+; COST: cost of 6 {{.*}} zext
+  %r = zext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index e67b478..c078f49 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -156,175 +156,3 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
-
-; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
-; instructions as expensive. If lowering is improved the cost model needs to
-; change.
-; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
-%T0_5 = type <8 x i8>
-%T1_5 = type <8 x i32>
-; CHECK: func_cvt5:
-define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
-; CHECK: vmovl.s8
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-  %v0 = load %T0_5* %loadaddr
-; COST: func_cvt5
-; COST: cost of 3 {{.*}} sext
-  %r = sext %T0_5 %v0 to %T1_5
-  store %T1_5 %r, %T1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TA0_5 = type <8 x i8>
-%TA1_5 = type <8 x i32>
-; CHECK: func_cvt1:
-define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
-; CHECK: vmovl.u8
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-  %v0 = load %TA0_5* %loadaddr
-; COST: func_cvt1
-; COST: cost of 3 {{.*}} zext
-  %r = zext %TA0_5 %v0 to %TA1_5
-  store %TA1_5 %r, %TA1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%T0_51 = type <8 x i32>
-%T1_51 = type <8 x i8>
-; CHECK: func_cvt51:
-define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-  %v0 = load %T0_51* %loadaddr
-; COST: func_cvt51
-; COST: cost of 19 {{.*}} trunc
-  %r = trunc %T0_51 %v0 to %T1_51
-  store %T1_51 %r, %T1_51* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TT0_5 = type <16 x i8>
-%TT1_5 = type <16 x i32>
-; CHECK: func_cvt52:
-define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-  %v0 = load %TT0_5* %loadaddr
-; COST: func_cvt52
-; COST: cost of 6 {{.*}} sext
-  %r = sext %TT0_5 %v0 to %TT1_5
-  store %TT1_5 %r, %TT1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TTA0_5 = type <16 x i8>
-%TTA1_5 = type <16 x i32>
-; CHECK: func_cvt12:
-define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-  %v0 = load %TTA0_5* %loadaddr
-; COST: func_cvt12
-; COST: cost of 6 {{.*}} zext
-  %r = zext %TTA0_5 %v0 to %TTA1_5
-  store %TTA1_5 %r, %TTA1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TT0_51 = type <16 x i32>
-%TT1_51 = type <16 x i8>
-; CHECK: func_cvt512:
-define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-  %v0 = load %TT0_51* %loadaddr
-; COST: func_cvt512
-; COST: cost of 38 {{.*}} trunc
-  %r = trunc %TT0_51 %v0 to %TT1_51
-  store %TT1_51 %r, %TT1_51* %storeaddr
-  ret void
-}
-
-; CHECK: sext_v4i16_v4i64:
-define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-  %v0 = load <4 x i16>* %loadaddr
-; COST: sext_v4i16_v4i64
-; COST: cost of 3 {{.*}} sext
-  %r = sext <4 x i16> %v0 to <4 x i64>
-  store <4 x i64> %r, <4 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: zext_v4i16_v4i64:
-define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-  %v0 = load <4 x i16>* %loadaddr
-; COST: zext_v4i16_v4i64
-; COST: cost of 3 {{.*}} zext
-  %r = zext <4 x i16> %v0 to <4 x i64>
-  store <4 x i64> %r, <4 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: sext_v8i16_v8i64:
-define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-  %v0 = load <8 x i16>* %loadaddr
-; COST: sext_v8i16_v8i64
-; COST: cost of 6 {{.*}} sext
-  %r = sext <8 x i16> %v0 to <8 x i64>
-  store <8 x i64> %r, <8 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: zext_v8i16_v8i64:
-define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-  %v0 = load <8 x i16>* %loadaddr
-; COST: zext_v8i16_v8i64
-; COST: cost of 6 {{.*}} zext
-  %r = zext <8 x i16> %v0 to <8 x i64>
-  store <8 x i64> %r, <8 x i64>* %storeaddr
-  ret void
-}
-
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
index 3009e50..07ba230 100644
--- a/test/CodeGen/ARM/vcvt_combine.ll
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -7,7 +7,7 @@
 ; CHECK-NOT: vmul
 define void @t0() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -23,7 +23,7 @@ declare void @foo_int32x2_t(<2 x i32>)
 ; CHECK-NOT: vmul
 define void @t1() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -39,7 +39,7 @@ declare void @foo_uint32x2_t(<2 x i32>)
 ; CHECK: vmul
 define void @t2() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
@@ -53,7 +53,7 @@ entry:
 ; CHECK: vmul
 define void @t3() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -67,7 +67,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t4() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -81,7 +81,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t5() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
@@ -93,7 +93,3 @@ entry:
 }
 
 declare void @foo_int32x4_t(<4 x i32>)
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 7fddbed..e6f1338 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -11,7 +11,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -27,7 +27,7 @@ declare void @foo_float32x2_t(<2 x float>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
-  %tmp = load i32* @uin, align 4, !tbaa !3
+  %tmp = load i32* @uin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -41,7 +41,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -55,7 +55,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -69,7 +69,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -83,7 +83,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
@@ -95,8 +95,3 @@ entry:
 }
 
 declare void @foo_float32x4_t(<4 x float>)
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 74628f0..eb5ad8f 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -599,3 +599,27 @@ for.end179:                                       ; preds = %for.cond.loopexit,
 declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+
+; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8),
+; creating an illegal type during legalization and causing an assert.
+; PR15970
+define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}
+define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}