149 files changed, 4493 insertions, 964 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 78c6222..94c562b 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 8bde748..a016809 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -37,10 +37,11 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()
+  %exn = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  %eh_ptr = extractvalue {i8*, i32} %exn, 0
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  %eh_select2 = extractvalue {i8*, i32} %exn, 1
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -94,10 +95,6 @@ declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 0a157c9..426bd17 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: vstrhi.64
+;CHECK: vstrhi
   store double %1, double* %y
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 8bfd026..eb9c2d0 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: vldr.64
+; CHECK: vldr
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 7aae3ac..a8afc20 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index e47c038..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index cd1c9c8..a400b7b 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -10,28 +10,28 @@ entry:
   %4 = ashr i32 %3, 30
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float* %table, i32 %.sum
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %6 = load float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float* %table, i32 %.sum12
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %10 = load float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float* %table, i32 %.sum13
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %14 = load float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float* %table, i32 %.sum14
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %18 = load float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index c03c815..2842437 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -21,12 +21,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare void @_Unwind_SjLj_Resume(i8*)
@@ -75,8 +71,11 @@ try.cont:                                         ; preds = %lpad
   ret i32 %conv
 
 lpad:                                             ; preds = %entry
-  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* bitcast (%0* @_ZTI1A to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
   %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
   br i1 %3, label %try.cont, label %eh.resume
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index f57b7e6..4b47085 100644
--- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -34,10 +34,12 @@ return:                                           ; preds = %entry
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
-  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
 
@@ -54,10 +56,6 @@ declare arm_apcscc void @func2()
 
 declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
 
 declare arm_apcscc void @func3()
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 0422094..ec74880 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=trivial
 ; RUN: llc < %s -verify-machineinstrs -spiller=inline
 ; PR8612
 ;
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..770ad44 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,39 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK: foo:
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
-  %tmp40 = load <4 x i8>* undef
-  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
-  %conv42 = zext i8 %tmp41 to i32
-  %conv43 = sitofp i32 %conv42 to float
-  %div44 = fdiv float %conv43, 2.560000e+02
-  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
-  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit46, <4 x float>* undef
-  br i1 undef, label %if.then105, label %if.else109
-
-if.then105:                                       ; preds = %entry
-  br label %if.end114
-
-if.else109:                                       ; preds = %entry
-  br label %if.end114
-
-if.end114:                                        ; preds = %if.else109, %if.then105
-  %call185 = call float @bar()
-  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
-  %call189 = call float @bar()
-  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
-  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit191, <4 x float>* undef
+  tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
 ; CHECK: vpop {d10, d11}
 ; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 9484212..ca88eed 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
-@x1 = internal global i8 1
-@x2 = internal global i8 1
-@x3 = internal global i8 1
-@x4 = internal global i8 1
-@x5 = global i8 1
+@x1 = internal global i8 1, align 1
+@x2 = internal global i8 1, align 1
+@x3 = internal global i8 1, align 1
+@x4 = internal global i8 1, align 1
+@x5 = global i8 1, align 1
 
 ; Check debug info output for merged global.
 ; DW_AT_location
@@ -17,8 +17,7 @@ target triple = "thumbv7-apple-darwin10"
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset6
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281e9..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
index 0b5f962..d3394b5 100644
--- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -35,14 +35,14 @@ for.cond.backedge:
   br label %for.cond
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 lpad26:
-  %exn27 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
@@ -57,31 +57,26 @@ call8.i.i.i.noexc:
   ret void
 
 lpad44:
-  %exn45 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn45 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 eh.resume:
-  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
-  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
-  unreachable
+  %exn.slot.0 = phi { i8*, i32 } [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  resume { i8*, i32 } %exn.slot.0
 
 terminate.lpad:
-  %exn51 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn51 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   tail call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 }
 
 declare void @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
 
 declare void @_ZSt9terminatev()
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 1b5b8a9..091d037 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,12 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
 ; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
 
-%struct.config = type { i16, i16, i16, i16 }
-
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
 @read_buf = external global i32 (i8*, i32)*
 @window = external global [0 x i8]
 @lookahead = internal unnamed_addr global i32 0, align 4
-@eofile.b = internal unnamed_addr global i1 false
+@eofile.b = internal unnamed_addr global i32 0
 @ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f681c34..f2b0c5d 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,8 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset33
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
new file mode 100644
index 0000000..09db740
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -mattr=+neon,+neonfp -relocation-model=pic
+
+target triple = "armv6-none-linux-gnueabi"
+
+define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
+L.entry:
+  %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+  %1 = bitcast i16* %destValues to i8*
+  %2 = mul i32 %0, 6
+  %3 = getelementptr i8* %1, i32 %2
+  %4 = bitcast i8* %3 to <3 x i16>*
+  %5 = load <3 x i16>* %4, align 1
+  %6 = bitcast i16* %sourceA to i8*
+  %7 = getelementptr i8* %6, i32 %2
+  %8 = bitcast i8* %7 to <3 x i16>*
+  %9 = load <3 x i16>* %8, align 1
+  %10 = or <3 x i16> %9, %5
+  store <3 x i16> %10, <3 x i16>* %4, align 1
+  ret void
+}
+
+declare i32 @get_index(...)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
new file mode 100644
index 0000000..ff049c8
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -0,0 +1,21 @@
+; Make sure short memsets on ARM lower to stores, even when optimizing for size.
+; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+; CHECK-GENERIC:      strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-UNALIGNED:      strb
+; CHECK-UNALIGNED-NEXT: str 
+define void @foo(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
new file mode 100644
index 0000000..42b1491
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK:      vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
new file mode 100644
index 0000000..113cbfe
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@i8_res  = global <2 x i8> <i8 0, i8 0>
+@i8_src1 = global <2 x i8> <i8 1, i8 2>
+@i8_src2 = global <2 x i8> <i8 2, i8 1>
+
+define void @test_neon_vector_add_2xi8() nounwind {
+; CHECK: test_neon_vector_add_2xi8:
+  %1 = load <2 x i8>* @i8_src1
+  %2 = load <2 x i8>* @i8_src2
+  %3 = add <2 x i8> %1, %2
+  store <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
+
+define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
+; CHECK: test_neon_ld_st_volatile_with_ashr_2xi8:
+  %1 = load volatile <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>* @i8_src2
+  %3 = ashr <2 x i8> %1, %2
+  store volatile <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
new file mode 100644
index 0000000..2ab6a4f
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@src1_v2i16 = global <2 x i16> <i16 0, i16 1>
+@res_v2i16  = global <2 x i16> <i16 0, i16 0>
+
+declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
+
+define void @test_neon_call_return_v2i16() {
+; CHECK: test_neon_call_return_v2i16:
+  %1 = load <2 x i16>* @src1_v2i16
+  %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
+  store <2 x i16> %2, <2 x i16>* @res_v2i16
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
new file mode 100644
index 0000000..719571b
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @test1(<2 x double>* %A) {
+; CHECK: test1
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @test2(<2 x double>* %A) {
+; CHECK: test2
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x double> @test3(<2 x i32>* %A) {
+; CHECK: test3
+; CHECK: vcvt.f64.s32
+; CHECK: vcvt.f64.s32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
+
+define <2 x double> @test4(<2 x i32>* %A) {
+; CHECK: test4
+; CHECK: vcvt.f64.u32
+; CHECK: vcvt.f64.u32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
new file mode 100644
index 0000000..52aa0bf
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-regalloc
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This test calls shrinkToUses with an early-clobber redefined live range during
+; spilling.
+;
+;   Shrink: %vreg47,1.158257e-02 = [384r,400e:0)[400e,420r:1)  0@384r 1@400e
+;
+; The early-clobber instruction is an str:
+;
+;   %vreg12<earlyclobber,def> = t2STR_PRE %vreg6, %vreg12, 32, pred:14, pred:%noreg
+;
+; This tests that shrinkToUses handles the EC redef correctly.
+
+%struct.Transform_Struct.0.11.12.17.43.46.56.58.60 = type { [4 x [4 x double]] }
+
+define void @Compute_Axis_Rotation_Transform(%struct.Transform_Struct.0.11.12.17.43.46.56.58.60* nocapture %transform, double* nocapture %V1, double %angle) nounwind {
+entry:
+  store double 1.000000e+00, double* null, align 4
+  %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
+  store double 0.000000e+00, double* %arrayidx5.1.i, align 4
+  %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
+  store double 0.000000e+00, double* %arrayidx5.2.i, align 4
+  %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
+  store double 0.000000e+00, double* %arrayidx5.114.i, align 4
+  %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
+  store double 1.000000e+00, double* %arrayidx5.1.1.i, align 4
+  store double 0.000000e+00, double* null, align 4
+  store double 1.000000e+00, double* null, align 4
+  store double 0.000000e+00, double* null, align 4
+  %call = tail call double @cos(double %angle) nounwind readnone
+  %call1 = tail call double @sin(double %angle) nounwind readnone
+  %0 = load double* %V1, align 4
+  %arrayidx2 = getelementptr inbounds double* %V1, i32 1
+  %1 = load double* %arrayidx2, align 4
+  %mul = fmul double %0, %1
+  %sub = fsub double 1.000000e+00, %call
+  %mul3 = fmul double %mul, %sub
+  %2 = load double* undef, align 4
+  %mul5 = fmul double %2, %call1
+  %add = fadd double %mul3, %mul5
+  store double %add, double* %arrayidx5.1.i, align 4
+  %3 = load double* %V1, align 4
+  %mul11 = fmul double %3, undef
+  %mul13 = fmul double %mul11, %sub
+  %4 = load double* %arrayidx2, align 4
+  %mul15 = fmul double %4, %call1
+  %sub16 = fsub double %mul13, %mul15
+  store double %sub16, double* %arrayidx5.2.i, align 4
+  %5 = load double* %V1, align 4
+  %6 = load double* %arrayidx2, align 4
+  %mul22 = fmul double %5, %6
+  %mul24 = fmul double %mul22, %sub
+  %sub27 = fsub double %mul24, undef
+  store double %sub27, double* %arrayidx5.114.i, align 4
+  ret void
+}
+
+declare double @cos(double) nounwind readnone
+
+declare double @sin(double) nounwind readnone
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
new file mode 100644
index 0000000..5409f8c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0.0 | FileCheck %s
+; rdar://10464621
+
+; DAG combine increases loads from packed types. ARM load / store optimizer then
+; combined them into a ldm which causes runtime exception.
+
+%struct.InformationBlock = type <{ i32, %struct.FlagBits, %struct.FlagBits }>
+%struct.FlagBits = type <{ [4 x i32] }>
+
+@infoBlock = external global %struct.InformationBlock
+
+define hidden void @foo() {
+; CHECK: foo:
+; CHECK: ldr.w
+; CHECK: ldr.w
+; CHECK-NOT: ldm
+entry:
+  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
+  %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
+  %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
+  %insert27 = insertvalue [4 x i32] %insert25, i32 %tmp19, 3
+  %insert = insertvalue [4 x i32] undef, i32 %tmp, 0
+  %insert7 = insertvalue [4 x i32] %insert, i32 %tmp3, 1
+  %insert9 = insertvalue [4 x i32] %insert7, i32 %tmp4, 2
+  %insert11 = insertvalue [4 x i32] %insert9, i32 %tmp5, 3
+  tail call void @bar([4 x i32] %insert27, [4 x i32] %insert11)
+  ret void
+}
+
+declare void @bar([4 x i32], [4 x i32])
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
new file mode 100644
index 0000000..6fbae19
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:{{.*}}
+; CHECK:      movt    r1, :upper16:{{.*}}
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:       movt  [[reg0]], :upper16:{{.*}}
+; CHECK:       vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
new file mode 100644
index 0000000..0c90f4c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+; <rdar://problem/10497732>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i32 1
+@x2 = internal global i64 12
+
+define i64 @f() {
+  %ax = load i32* @x1
+  %a = zext i32 %ax to i64
+  %b = load i64* @x2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; We can global-merge the i64 in theory, but the current code doesn't handle
+; the alignment correctly; for the moment, just check that we don't do it.
+; See also 
+
+; CHECK-NOT: MergedGlobals
+; CHECK: _x2
+; CHECK-NOT: MergedGlobals
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
new file mode 100644
index 0000000..5ce600d
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; Radar 10266272
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios4.0.0"
+; STATS-NOT: machine-sink
+
+define i32 @foo(i32 %h) nounwind readonly ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %cmp = icmp slt i32 0, %h
+  br i1 %cmp, label %for.body, label %if.end299
+
+for.body:                                         ; preds = %for.cond
+  %v.5 = select i1 undef, i32 undef, i32 0
+  %0 = load i8* undef, align 1, !tbaa !0
+  %conv88 = zext i8 %0 to i32
+  %sub89 = sub nsw i32 0, %conv88
+  %v.8 = select i1 undef, i32 undef, i32 %sub89
+  %1 = load i8* null, align 1, !tbaa !0
+  %conv108 = zext i8 %1 to i32
+  %2 = load i8* undef, align 1, !tbaa !0
+  %conv110 = zext i8 %2 to i32
+  %sub111 = sub nsw i32 %conv108, %conv110
+  %cmp112 = icmp slt i32 %sub111, 0
+  %sub115 = sub nsw i32 0, %sub111
+  %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
+  %add62 = add i32 0, %v.5
+  %add73 = add i32 %add62, 0
+  %add84 = add i32 %add73, 0
+  %add95 = add i32 %add84, %v.8
+  %add106 = add i32 %add95, 0
+  %add117 = add i32 %add106, %v.10
+  %add128 = add i32 %add117, 0
+  %add139 = add i32 %add128, 0
+  %add150 = add i32 %add139, 0
+  %add161 = add i32 %add150, 0
+  %add172 = add i32 %add161, 0
+  br i1 undef, label %for.cond, label %if.end299
+
+if.end299:                                        ; preds = %for.body, %for.cond
+  %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+  ret i32 %s.10
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
new file mode 100644
index 0000000..c9ea691
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 -verify-machineinstrs < %s
+; PR12165
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "arm-none-linux"
+
+define hidden void @_strtod_r() nounwind {
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %1, %0
+  br i1 undef, label %3, label %8
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %7
+
+; <label>:4                                       ; preds = %3
+  %5 = call i32 @llvm.flt.rounds()
+  %6 = icmp eq i32 %5, 1
+  br i1 %6, label %8, label %7
+
+; <label>:7                                       ; preds = %4, %3
+  unreachable
+
+; <label>:8                                       ; preds = %4, %2
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br label %10
+
+; <label>:10                                      ; preds = %9, %8
+  ret void
+}
+
+declare i32 @llvm.flt.rounds() nounwind
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
new file mode 100644
index 0000000..6206cd7
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: vst1.32
+  %0 = load <3 x i16> * %srcA, align 8
+  %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+  store <2 x i16> %1, <2 x i16> * %dst, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 0000000..0ff4f51
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
new file mode 100644
index 0000000..33ad187
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+  br i1 undef, label %5, label %1
+
+; <label>:1                                       ; preds = %0
+  %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %2 to <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
+  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+; <label>:5                                       ; preds = %0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
new file mode 100644
index 0000000..6f50f27
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+;target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
+bb4:
+  %tmp = extractelement <2 x float> undef, i32 0
+  br i1 undef, label %bb18, label %bb5
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = fadd float %tmp, -1.500000e+01
+  %tmp7 = fdiv float %tmp6, 2.000000e+01
+  %tmp8 = fadd float %tmp7, 1.000000e+00
+  %tmp9 = fdiv float 1.000000e+00, %tmp8
+  %tmp10 = fsub float 1.000000e+00, %tmp9
+  %tmp11 = fmul float %tmp10, 1.000000e+01
+  %tmp12 = fadd float %tmp11, 1.500000e+01
+  %tmp13 = fdiv float %tmp12, %tmp
+  %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
+  %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
+  %tmp17 = fadd <4 x float> %tmp16, %arg
+  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  br label %bb18
+
+bb18:                                             ; preds = %bb5, %bb4
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 92aff70..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -3,14 +3,48 @@
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
 
-define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: t1:
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
   ret i32 %2
 }
+
+; Avoid partial CPSR dependency via loop backedge.
+; rdar://10357570
+define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
+entry:
+; CHECK: t2:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK-NOT: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..94edff5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
@@ -96,3 +96,70 @@ bb:
   tail call void @foo() nounwind
   ret void
 }
+
+; Make sure codegenprep is duplicating ret instructions to enable tail calls.
+; rdar://11140249
+define i32 @t8(i32 %x) nounwind ssp {
+entry:
+; CHECKT2D: t8:
+; CHECKT2D-NOT: push
+; CHECKT2D-NOT
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+; CHECKT2D: bne.w _a
+  %call = tail call i32 @a(i32 %x) nounwind
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %and1 = and i32 %x, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+; CHECKT2D: bne.w _b
+  %call4 = tail call i32 @b(i32 %x) nounwind
+  br label %return
+
+if.end5:                                          ; preds = %if.end
+; CHECKT2D: b.w _c
+  %call6 = tail call i32 @c(i32 %x) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ]
+  ret i32 %retval.0
+}
+
+declare i32 @a(i32)
+
+declare i32 @b(i32)
+
+declare i32 @c(i32)
+
+; PR12419
+; rdar://11195178
+; Use the correct input chain for the tailcall node or else the call to
+; _ZN9MutexLockD1Ev would be lost.
+%class.MutexLock = type { i8 }
+
+@x = external global i32, align 4
+
+define i32 @t9() nounwind {
+; CHECKT2D: t9:
+; CHECKT2D: blx __ZN9MutexLockC1Ev
+; CHECKT2D: blx __ZN9MutexLockD1Ev
+; CHECKT2D: b.w ___divsi3
+  %lock = alloca %class.MutexLock, align 1
+  %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
+  %2 = load i32* @x, align 4
+  %3 = sdiv i32 1000, %2
+  %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
+  ret i32 %3
+}
+
+declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 0f9543f..107e79a 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -26,7 +26,7 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
   %8 = load i32** %7                              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index e381e00..5b6a584 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
 define i32 @test(i32 %x) {
 ; CHECK: test
 ; CHECK: clz r0, r0
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
new file mode 100644
index 0000000..7316452
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; This is really an LSR test: Make sure that cmp is using the incremented
+; induction variable.
+; CHECK: %if.end8
+; CHECK: add{{(s|\.w)?}} [[IV:r[0-9]+]], {{.*}}#1
+; CHECK: cmp [[IV]], #
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
new file mode 100644
index 0000000..6419292
--- /dev/null
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN:      .long _f151
+; DARWIN-NEXT: .long _f152
+
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
+; ELF:      .long    f152
+
+; GNUEABI:      .section .init_array.151,"aw",%init_array
+; GNUEABI:      .long    f151
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
+
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
+
+define void @f151() {
+entry:
+        ret void
+}
+
+define void @f152() {
+entry:
+        ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 1d2ced3..5ebca53 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: rbit
 ; CHECK: clz
-  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
new file mode 100644
index 0000000..18f57ea
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+define float @g(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  %1 = load <4 x i16>* %in
+  ; CHECK-NOT: uxth
+  %2 = extractelement <4 x i16> %1, i32 0
+  ; CHECK: vcvt.f32.u32
+  %3 = uitofp i16 %2 to float
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 2c59316..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: mydata <- [sp+#4]+#0
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 8c9095e..325eea0 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index e83a83d..97c9c66 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -3,13 +3,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: DW_OP_regx for Q register: D1
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 ;CHECK-NEXT: DW_OP_regx for Q register: D2
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 548c9bd..db41143 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -2,8 +2,7 @@
 ; Radar 9309221
 ; Test dwarf reg no for s16
 ;CHECK: DW_OP_regx for S register
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_bit_piece 32 0
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ee777ce..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset8 = Ltmp10-Ltmp9                    @ Loc expr size
-;CHECK-NEXT:        .short  Lset8
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
+;CHECK-NEXT:        .short  Lset[[N]]
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index e475508..d1252f4 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -3,12 +3,6 @@ target triple = "armv6-apple-macosx10.6"
 
 declare void @func()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
-declare void @llvm.eh.resume(i8*, i32)
-
 declare i32 @__gxx_personality_sj0(...)
 
 define void @test0() {
@@ -20,10 +14,9 @@ cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception()
-  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
-  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
-  unreachable
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  resume { i8*, i32 } %exn
 }
 
 ; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000..dbb634d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
new file mode 100644
index 0000000..7c532d5
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; THUMB: t1:
+; ARM: t1:
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; THUMB-NOT: b LBB0_1
+; ARM-NOT:  b LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; THUMB: b LBB0_4
+; ARM:  b LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; THUMB-NOT: b LBB0_5
+; ARM-NOT:  b LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
new file mode 100644
index 0000000..dd460b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t0(i1 zeroext %a) nounwind {
+  %1 = zext i1 %a to i32
+  ret i32 %1
+}
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ARM: foo
+; THUMB: foo
+;; Materialize i1 1
+; ARM: movw r2, #1
+;; zero-ext
+; ARM: and r2, r2, #1
+; THUMB: and r2, r2, #1
+  %1 = call i32 @t0(i1 zeroext 1)
+; ARM: sxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxtb	r2, r1
+; THUMB: mov r0, r2
+  %2 = call i32 @t1(i8 signext %a)
+; ARM: uxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxtb	r2, r1
+; THUMB: mov r0, r2
+  %3 = call i32 @t2(i8 zeroext %a)
+; ARM: sxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxth	r2, r1
+; THUMB: mov r0, r2
+  %4 = call i32 @t3(i16 signext %b)
+; ARM: uxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxth	r2, r1
+; THUMB: mov r0, r2
+  %5 = call i32 @t4(i16 zeroext %b)
+
+;; A few test to check materialization
+;; Note: i1 1 was materialized with t1 call
+; ARM: movw r1, #255
+%6 = call i32 @t2(i8 zeroext 255)
+; ARM: movw r1, #65535
+; THUMB: movw r1, #65535
+%7 = call i32 @t4(i16 zeroext 65535)
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  %5 = call zeroext i1 @t9()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+declare zeroext i1 @t9();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ARM: @t10
+; ARM: movw r0, #0
+; ARM: movw r1, #248
+; ARM: movw r2, #187
+; ARM: movw r3, #28
+; ARM: movw r9, #40
+; ARM: movw r12, #186
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: uxtb r2, r2
+; ARM: uxtb r3, r3
+; ARM: uxtb r9, r9
+; ARM: str r9, [sp]
+; ARM: uxtb r9, r12
+; ARM: str r9, [sp, #4]
+; ARM: bl _bar
+; THUMB: @t10
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: movs r1, #248
+; THUMB: movt r1, #0
+; THUMB: movs r2, #187
+; THUMB: movt r2, #0
+; THUMB: movs r3, #28
+; THUMB: movt r3, #0
+; THUMB: movw r9, #40
+; THUMB: movt r9, #0
+; THUMB: movw r12, #186
+; THUMB: movt r12, #0
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: uxtb r2, r2
+; THUMB: uxtb r3, r3
+; THUMB: uxtb.w r9, r9
+; THUMB: str.w r9, [sp]
+; THUMB: uxtb.w r9, r12
+; THUMB: str.w r9, [sp, #4]
+; THUMB: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
new file mode 100644
index 0000000..660156a
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ARM: t1a
+; THUMB: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ARM: vcmpe.f32 s{{[0-9]+}}, #0
+; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ARM: t1b
+; THUMB: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ARM: t2a
+; THUMB: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ARM: vcmpe.f64 d{{[0-9]+}}, #0
+; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ARM: t2b
+; THUMB: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ARM: t4
+; THUMB: t4
+  %cmp = icmp eq i8 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ARM: t5
+; THUMB: t5
+  %cmp = icmp eq i8 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ARM: t6
+; THUMB: t6
+  %cmp = icmp eq i16 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ARM: t7
+; THUMB: t7
+  %cmp = icmp eq i16 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ARM: t8
+; THUMB: t8
+  %cmp = icmp eq i32 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ARM: t9
+; THUMB: t9
+  %cmp = icmp eq i32 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ARM: t10
+; THUMB: t10
+  %cmp = icmp eq i32 %a, 384
+; ARM: cmp r{{[0-9]}}, #384
+; THUMB: cmp.w r{{[0-9]}}, #384
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ARM: t11
+; THUMB: t11
+  %cmp = icmp eq i32 %a, 4096
+; ARM: cmp r{{[0-9]}}, #4096
+; THUMB: cmp.w r{{[0-9]}}, #4096
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ARM: t12
+; THUMB: t12
+  %cmp = icmp ugt i8 %a, -113
+; ARM: cmp r{{[0-9]}}, #143
+; THUMB: cmp r{{[0-9]}}, #143
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; rdar://11038907
+; When comparing LONG_MIN/INT_MIN use a cmp instruction.
+define void @t13() nounwind ssp {
+entry:
+; ARM: t13
+; THUMB: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ARM: cmp r{{[0-9]}}, #-2147483648
+; THUMB: cmp.w r{{[0-9]}}, #-2147483648
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
new file mode 100644
index 0000000..686ccad
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test sitofp
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: sitofp_single_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
new file mode 100644
index 0000000..7e147c7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Target-specific selector can't properly handle the double because it isn't
+; being passed via a register, so the materialized arguments become dead code.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; THUMB: main
+  call void @printArgsNoRet(i32 1, float 0x4000CCCCC0000000, i8 signext 99, double 4.100000e+00)
+; THUMB: blx _printArgsNoRet
+; THUMB-NOT: ldr
+; THUMB-NOT: vldr
+; THUMB-NOT: vmov
+; THUMB-NOT: ldr
+; THUMB-NOT: sxtb
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: pop
+  ret i32 0
+}
+
+declare void @printArgsNoRet(i32 %a1, float %a2, i8 signext %a3, double %a4)
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
new file mode 100644
index 0000000..61bd185
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+
+define void @t1() nounwind uwtable ssp {
+; ARM: t1
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t1
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ARM: t2
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t2
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ARM: t3
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t3
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ARM: t4
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t4
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ARM: t5
+; ARM: ldrsh
+; ARM-NOT: sxth
+; THUMB: t5
+; THUMB: ldrsh
+; THUMB-NOT: sxth
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ARM: t6
+; ARM: ldrsb
+; ARM-NOT: sxtb
+; THUMB: t6
+; THUMB: ldrsb
+; THUMB-NOT: sxtb
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
new file mode 100644
index 0000000..8764bef
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_unsigned
+; ARM: uxth r0, r0
+; ARM: uxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_unsigned
+; THUMB: uxth r0, r0
+; THUMB: uxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp ult i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_signed
+; ARM: sxtb r0, r0
+; ARM: sxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_signed
+; THUMB: sxtb r0, r0
+; THUMB: sxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp sgt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
+entry:
+; ARM: icmp_i1_unsigned
+; ARM: and r0, r0, #1
+; ARM: and r1, r1, #1
+; ARM: cmp r0, r1
+; THUMB: icmp_i1_unsigned
+; THUMB: and r0, r0, #1
+; THUMB: and r1, r1, #1
+; THUMB: cmp r0, r1
+  %cmp = icmp ult i1 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
new file mode 100644
index 0000000..e6bdfa7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
+@temp = common global [60 x i8] zeroinitializer, align 1
+
+define void @t1() nounwind ssp {
+; ARM: t1
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
+; ARM: add r0, r0, #5
+; ARM: movw r1, #64
+; ARM: movw r2, #10
+; ARM: uxtb r1, r1
+; ARM: bl _memset
+; THUMB: t1
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
+; THUMB: adds r0, #5
+; THUMB: movs r1, #64
+; THUMB: movt r1, #0
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: uxtb r1, r1
+; THUMB: bl _memset
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t2() nounwind ssp {
+; ARM: t2
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #17
+; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp]                @ 4-byte Reload
+; ARM: bl _memcpy
+; THUMB: t2
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #17
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t3() nounwind ssp {
+; ARM: t3
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: mov r0, r1
+; ARM: bl _memmove
+; THUMB: t3
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+define void @t4() nounwind ssp {
+; ARM: t4
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
new file mode 100644
index 0000000..dfb8c53
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 1
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr r{{[0-9]}}, [r0, #4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 63
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr.w r{{[0-9]}}, [r0, #252]
+  ret i32 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 1
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh r{{[0-9]}}, [r0, #2]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 63
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
+  ret i16 %0
+}
+
+define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 1
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb r{{[0-9]}}, [r0, #1]
+  ret i8 %0
+}
+
+define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 63
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
new file mode 100644
index 0000000..2a88678
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-252]
+  ret i32 %0
+}
+
+define i32 @t3(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t3
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0]
+  ret i32 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t5
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t6
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0]
+  ret i16 %0
+}
+
+define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t7
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
+  ret i8 %0
+}
+
+define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t8
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
+  ret i8 %0
+}
+
+define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t9
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0]
+  ret i8 %0
+}
+
+define void @t10(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t10
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-4]
+  ret void
+}
+
+define void @t11(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t11
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-252]
+  ret void
+}
+
+define void @t12(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t12
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t13(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t13
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-2]
+  ret void
+}
+
+define void @t14(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t14
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-254]
+  ret void
+}
+
+define void @t15(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t15
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t16(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t16
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-1]
+  ret void
+}
+
+define void @t17(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t17
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-255]
+  ret void
+}
+
+define void @t18(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t18
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0]
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
new file mode 100644
index 0000000..e8cc2b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; rdar://10418009
+
+define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-16]
+  ret i16 %0
+}
+
+define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i16* %a, i64 -16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-32]
+  ret i16 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %a, i64 -127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  %0 = load i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #16]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i16* %a, i64 16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #32]
+  ret i16 %0
+}
+
+define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t7
+  %add.ptr = getelementptr inbounds i16* %a, i64 127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #254]
+  ret i16 %0
+}
+
+define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t8
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  %0 = load i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define void @t9(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t9
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  store i16 0, i16* %add.ptr, align 2
+; ARM: strh	r1, [r0, #-16]
+  ret void
+}
+
+; mvn r1, #255
+; strh r2, [r0, r1]
+define void @t10(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t10
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define void @t11(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t11
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  store i16 0, i16* %add.ptr, align 2
+; ARM strh r{{[1-9]}}, [r0, #16]
+  ret void
+}
+
+; mov r1, #256
+; strh r2, [r0, r1]
+define void @t12(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t12
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t13
+  %add.ptr = getelementptr inbounds i8* %a, i64 -8
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-8]
+  ret i8 %0
+}
+
+define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t14
+  %add.ptr = getelementptr inbounds i8* %a, i64 -255
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-255]
+  ret i8 %0
+}
+
+define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t15
+  %add.ptr = getelementptr inbounds i8* %a, i64 -256
+  %0 = load i8* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrsb r0, [r0]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
new file mode 100644
index 0000000..b180e43
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; rdar://10412592
+
+; Note: The Thumb code is being generated by the target-independent selector.
+
+define void @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+; ARM: mvn r0, #0
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #65535
+  call void @foo(i32 -1)
+  ret void
+}
+
+declare void @foo(i32)
+
+define void @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+; ARM: mvn r0, #233
+; THUMB: movw r0, #65302
+; THUMB: movt r0, #65535
+  call void @foo(i32 -234)
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+; ARM: mvn	r0, #256
+; THUMB: movw r0, #65279
+; THUMB: movt r0, #65535
+  call void @foo(i32 -257)
+  ret void
+}
+
+; Load from constant pool
+define void @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+; ARM: ldr	r0
+; THUMB: movw r0, #65278
+; THUMB: movt r0, #65535
+  call void @foo(i32 -258)
+  ret void
+}
+
+define void @t5() nounwind {
+entry:
+; ARM: t5
+; THUMB: t5
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; ARM: t6
+; THUMB: t6
+; ARM: mvn r0, #978944
+; THUMB: movw r0, #4095
+; THUMB: movt r0, #65521
+  call void @foo(i32 -978945)
+  ret void
+}
+
+define void @t7() nounwind {
+entry:
+; ARM: t7
+; THUMB: t7
+; ARM: mvn r0, #267386880
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #61455
+  call void @foo(i32 -267386881)
+  ret void
+}
+
+define void @t8() nounwind {
+entry:
+; ARM: t8
+; THUMB: t8
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t9() nounwind {
+entry:
+; ARM: t9
+; THUMB: t9
+; ARM: mvn r0, #2130706432
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #33023
+  call void @foo(i32 -2130706433)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 08dcc64..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
new file mode 100644
index 0000000..689b169
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Sign-extend of i1 currently not supported by fast-isel
+;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
+;entry:
+;  ret i1 %a
+;}
+
+define zeroext i1 @ret1(i1 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret1
+; CHECK: and r0, r0, #1
+; CHECK: bx lr
+  ret i1 %a
+}
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret2
+; CHECK: sxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret3
+; CHECK: uxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret4
+; CHECK: sxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret5
+; CHECK: uxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
new file mode 100644
index 0000000..b83a733
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i1 %c) nounwind readnone {
+entry:
+; ARM: t1
+; ARM: movw r{{[1-9]}}, #10
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t1
+; THUMB: movs r{{[1-9]}}, #10
+; THUMB: movt r{{[1-9]}}, #0
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 10, i32 20
+  ret i32 %0
+}
+
+define i32 @t2(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t2
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t2
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 20
+  ret i32 %0
+}
+
+define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
+entry:
+; ARM: t3
+; ARM: cmp r0, #0
+; ARM: movne r{{[1-9]}}, r{{[1-9]}}
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t3
+; THUMB: cmp r0, #0
+; THUMB: it ne
+; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 %b
+  ret i32 %0
+}
+
+define i32 @t4(i1 %c) nounwind readnone {
+entry:
+; ARM: t4
+; ARM: mvn r{{[1-9]}}, #9
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #0
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t4
+; THUMB: movw r{{[1-9]}}, #65526
+; THUMB: movt r{{[1-9]}}, #65535
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #0
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 -10, i32 -1
+  ret i32 %0
+}
+
+define i32 @t5(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t5
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #1
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t5
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #1
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -2
+  ret i32 %0
+}
+
+; Check one large negative immediates.
+define i32 @t6(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t6
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #978944
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t6
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #978944
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -978945
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 465e85f..417e2d9 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,19 +142,87 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
-; ARM: ldr r0, [r0]
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
+}
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @test5(double %a, double %b) nounwind {
+entry:
+; ARM: @test5
+; THUMB: @test5
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
 }
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..27fa2b0 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -40,28 +40,12 @@ entry:
   ret double %1
 }
 
-; rdar://9059537
-define i32 @test4() ssp {
-entry:
-; SOFT: test4:
-; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
-; This S-reg must be the first sub-reg of the last D-reg on vbsl.
-; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
-; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
-; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
-; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
-  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
-  %conv81 = fptrunc double %call80 to float
-  %tmp88 = bitcast float %conv81 to i32
-  ret i32 %tmp88
-}
-
 ; rdar://9287902
-define float @test5() nounwind {
+define float @test4() nounwind {
 entry:
-; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
+; SOFT: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 227e4e8..1ba561d 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -3,7 +3,7 @@
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
   %conv = zext i32 %a to i64
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv, i1 true)
 ; CHECK: clz
 ; CHECK-NOT: adds
   %cast = trunc i64 %tmp1 to i32
@@ -11,4 +11,4 @@ entry:
   ret i32 %sub
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ac023d1..93601cf 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -42,7 +42,7 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: vldr.64
+;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 7c0dd0e..2d8f710 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr.32 s0,
-; NAN: vldr.32 s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 2e6b3e3..4a4c5b1 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -5,7 +5,7 @@ define i32 @f7(float %a, float %b) {
 entry:
 ; CHECK: f7:
 ; CHECK: vcmpe.f32
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
 ; CHECK-NOT: vmrs
 ; CHECK: movwvs
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 3833933..8faa578 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: vldr.32{{.*}}[
+; CHECK: vldr{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define float @f2offset(float* %v, float %u) {
 ; CHECK: f2offset:
-; CHECK: vldr.32{{.*}}, #4]
+; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float* %v, i32 1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -25,7 +25,7 @@ define float @f2offset(float* %v, float %u) {
 
 define float @f2noffset(float* %v, float %u) {
 ; CHECK: f2noffset:
-; CHECK: vldr.32{{.*}}, #-4]
+; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float* %v, i32 -1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -34,7 +34,7 @@ define float @f2noffset(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: vstr.32{{.*}}[
+; CHECK: vstr{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..a8b3999
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
+
+define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f32
+; CHECK: vfma.f32
+  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %tmp1
+}
+
+define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f64
+; CHECK: vfma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %tmp1
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_v2f32
+; CHECK: vfma.f32
+  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %tmp1
+}
+
+define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64_2
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64_2
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64
+; CHECK: vfnma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %tmp2 = fsub double -0.0, %tmp1
+  ret double %tmp2
+}
+
+define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64_2
+; CHECK: vfnma.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = fsub double -0.0, %c
+  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
+  ret double %tmp3
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 28bf221..1732df3 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -14,7 +14,7 @@
 ; offset.  Having the starting offset in range is not sufficient.
 ; When this works properly, @g3 is placed in a separate chunk of merged globals.
 ; CHECK: _MergedGlobals1:
-@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
 
 ; Global variables that can be placed in BSS should be kept together in a
 ; separate pool of merged globals.
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 5e7e3f2..eb71149 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -70,6 +70,5 @@ define i32 @test1() {
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
 ; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 63f8557..0f142ee 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -18,7 +18,7 @@ bb.nph:                                           ; preds = %entry
 
 bb:                                               ; preds = %bb4, %bb.nph
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
@@ -33,7 +33,7 @@ bb1:                                              ; preds = %bb
 ; CHECK-NOT: vcmpemi
 ; CHECK-NOT: vmrsmi
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
   %6 = load double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index cb5243c..2fcc45f 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -110,3 +110,13 @@ entry:
   call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
+
+; Radar 10551006
+
+define <4 x i32> @t11(i32* %p) nounwind {
+entry:
+; CHECK: t11
+; CHECK: vld1.s32 {d16[], d17[]}, [r0]
+  %0 = tail call <4 x i32> asm "vld1.s32 {${0:e}[], ${0:f}[]}, [$1]", "=w,r"(i32* %p) nounwind
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd-memoper.ll b/test/CodeGen/ARM/ldrd-memoper.ll
new file mode 100644
index 0000000..f1a1121
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd-memoper.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o /dev/null -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -debug-only=arm-ldst-opt 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; PR8113: ARMLoadStoreOptimizer must preserve memoperands.
+
+@b = external global i64*
+
+; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 8010f20..3f8fd75 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,24 +1,70 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
-; rdar://r6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
-; Magic ARM pair hints works best with linearscan.
+; Magic ARM pair hints works best with linearscan / fast.
+
+; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
+; register when interrupted or faulted.
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;V6:   ldrd r2, r3, [r2]
-
-;V5:   ldr r{{[0-9]+}}, [r2]
-;V5:   ldr r{{[0-9]+}}, [r2, #4]
+; A8: t:
+; A8:   ldrd r2, r3, [r2]
 
-;EABI: ldr r{{[0-9]+}}, [r2]
-;EABI: ldr r{{[0-9]+}}, [r2, #4]
+; M3: t:
+; M3-NOT: ldrd
+; M3: ldm.w r2, {r2, r3}
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldrd
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index d5aac2e..a99a7ec 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -25,8 +25,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -38,8 +38,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
new file mode 100644
index 0000000..5283f57
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LSR should compare against the post-incremented induction variable.
+; In this case, the immediate value is -2 which requires a cmn instruction.
+;
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
+; CHECK: cmn{{.*}}[[IV]], #2
+; CHECK: bne
+define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
+entry:
+  %cmp3 = icmp eq i32 %i, -2
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
+  %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
+  %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, %b.04
+  %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
+  %sub = add nsw i32 %i.addr.05, -2
+  %cmp = icmp eq i32 %i.addr.05, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
deleted file mode 100644
index 4737901..0000000
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ /dev/null
@@ -1,640 +0,0 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
-
-; LSR should recognize that this is an unrolled loop which can use
-; constant offset addressing, so that each of the following stores
-; uses the same register.
-
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
-
-; We can also save a register in the outer loop, but that requires
-; performing LSR on the outer loop.
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
-%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
-%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
-%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
-%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
-%4 = type opaque
-%5 = type opaque
-%6 = type { void (%2*)*, i32, i32, i32, i32 }
-%7 = type { [8 x i32], [12 x i32] }
-%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
-%9 = type { [64 x i16], i32 }
-%10 = type { [17 x i8], [256 x i8], i32 }
-%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
-%12 = type { %12*, i8, i32, i32, i8* }
-%13 = type { void (%0*)*, void (%0*)*, i32 }
-%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
-%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
-%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
-%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
-%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
-%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
-%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
-%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
-%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
-%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
-
-define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
-bb:
-  %t = alloca [64 x float], align 4
-  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
-  %t6 = load i8** %t5, align 4
-  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
-  %t8 = load i8** %t7, align 4
-  br label %bb9
-
-bb9:
-  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
-  %t11 = add i32 %t10, 8
-  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
-  %t13 = add i32 %t10, 16
-  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
-  %t15 = add i32 %t10, 24
-  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
-  %t17 = add i32 %t10, 32
-  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
-  %t19 = add i32 %t10, 40
-  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
-  %t21 = add i32 %t10, 48
-  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
-  %t23 = add i32 %t10, 56
-  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
-  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
-  %t26 = shl i32 %t10, 5
-  %t27 = or i32 %t26, 8
-  %t28 = getelementptr i8* %t8, i32 %t27
-  %t29 = bitcast i8* %t28 to float*
-  %t30 = or i32 %t26, 16
-  %t31 = getelementptr i8* %t8, i32 %t30
-  %t32 = bitcast i8* %t31 to float*
-  %t33 = or i32 %t26, 24
-  %t34 = getelementptr i8* %t8, i32 %t33
-  %t35 = bitcast i8* %t34 to float*
-  %t36 = or i32 %t26, 4
-  %t37 = getelementptr i8* %t8, i32 %t36
-  %t38 = bitcast i8* %t37 to float*
-  %t39 = or i32 %t26, 12
-  %t40 = getelementptr i8* %t8, i32 %t39
-  %t41 = bitcast i8* %t40 to float*
-  %t42 = or i32 %t26, 20
-  %t43 = getelementptr i8* %t8, i32 %t42
-  %t44 = bitcast i8* %t43 to float*
-  %t45 = or i32 %t26, 28
-  %t46 = getelementptr i8* %t8, i32 %t45
-  %t47 = bitcast i8* %t46 to float*
-  %t48 = getelementptr i8* %t8, i32 %t26
-  %t49 = bitcast i8* %t48 to float*
-  %t50 = shl i32 %t10, 3
-  %t51 = or i32 %t50, 1
-  %t52 = getelementptr i16* %a2, i32 %t51
-  %t53 = or i32 %t50, 2
-  %t54 = getelementptr i16* %a2, i32 %t53
-  %t55 = or i32 %t50, 3
-  %t56 = getelementptr i16* %a2, i32 %t55
-  %t57 = or i32 %t50, 4
-  %t58 = getelementptr i16* %a2, i32 %t57
-  %t59 = or i32 %t50, 5
-  %t60 = getelementptr i16* %a2, i32 %t59
-  %t61 = or i32 %t50, 6
-  %t62 = getelementptr i16* %a2, i32 %t61
-  %t63 = or i32 %t50, 7
-  %t64 = getelementptr i16* %a2, i32 %t63
-  %t65 = getelementptr i16* %a2, i32 %t50
-  %t66 = load i16* %t52, align 2
-  %t67 = icmp eq i16 %t66, 0
-  %t68 = load i16* %t54, align 2
-  %t69 = icmp eq i16 %t68, 0
-  %t70 = and i1 %t67, %t69
-  br i1 %t70, label %bb71, label %bb91
-
-bb71:
-  %t72 = load i16* %t56, align 2
-  %t73 = icmp eq i16 %t72, 0
-  br i1 %t73, label %bb74, label %bb91
-
-bb74:
-  %t75 = load i16* %t58, align 2
-  %t76 = icmp eq i16 %t75, 0
-  br i1 %t76, label %bb77, label %bb91
-
-bb77:
-  %t78 = load i16* %t60, align 2
-  %t79 = icmp eq i16 %t78, 0
-  br i1 %t79, label %bb80, label %bb91
-
-bb80:
-  %t81 = load i16* %t62, align 2
-  %t82 = icmp eq i16 %t81, 0
-  br i1 %t82, label %bb83, label %bb91
-
-bb83:
-  %t84 = load i16* %t64, align 2
-  %t85 = icmp eq i16 %t84, 0
-  br i1 %t85, label %bb86, label %bb91
-
-bb86:
-  %t87 = load i16* %t65, align 2
-  %t88 = sitofp i16 %t87 to float
-  %t89 = load float* %t49, align 4
-  %t90 = fmul float %t88, %t89
-  store float %t90, float* %t25, align 4
-  store float %t90, float* %t12, align 4
-  store float %t90, float* %t14, align 4
-  store float %t90, float* %t16, align 4
-  store float %t90, float* %t18, align 4
-  store float %t90, float* %t20, align 4
-  store float %t90, float* %t22, align 4
-  store float %t90, float* %t24, align 4
-  br label %bb156
-
-bb91:
-  %t92 = load i16* %t65, align 2
-  %t93 = sitofp i16 %t92 to float
-  %t94 = load float* %t49, align 4
-  %t95 = fmul float %t93, %t94
-  %t96 = sitofp i16 %t68 to float
-  %t97 = load float* %t29, align 4
-  %t98 = fmul float %t96, %t97
-  %t99 = load i16* %t58, align 2
-  %t100 = sitofp i16 %t99 to float
-  %t101 = load float* %t32, align 4
-  %t102 = fmul float %t100, %t101
-  %t103 = load i16* %t62, align 2
-  %t104 = sitofp i16 %t103 to float
-  %t105 = load float* %t35, align 4
-  %t106 = fmul float %t104, %t105
-  %t107 = fadd float %t95, %t102
-  %t108 = fsub float %t95, %t102
-  %t109 = fadd float %t98, %t106
-  %t110 = fsub float %t98, %t106
-  %t111 = fmul float %t110, 0x3FF6A09E60000000
-  %t112 = fsub float %t111, %t109
-  %t113 = fadd float %t107, %t109
-  %t114 = fsub float %t107, %t109
-  %t115 = fadd float %t108, %t112
-  %t116 = fsub float %t108, %t112
-  %t117 = sitofp i16 %t66 to float
-  %t118 = load float* %t38, align 4
-  %t119 = fmul float %t117, %t118
-  %t120 = load i16* %t56, align 2
-  %t121 = sitofp i16 %t120 to float
-  %t122 = load float* %t41, align 4
-  %t123 = fmul float %t121, %t122
-  %t124 = load i16* %t60, align 2
-  %t125 = sitofp i16 %t124 to float
-  %t126 = load float* %t44, align 4
-  %t127 = fmul float %t125, %t126
-  %t128 = load i16* %t64, align 2
-  %t129 = sitofp i16 %t128 to float
-  %t130 = load float* %t47, align 4
-  %t131 = fmul float %t129, %t130
-  %t132 = fadd float %t127, %t123
-  %t133 = fsub float %t127, %t123
-  %t134 = fadd float %t119, %t131
-  %t135 = fsub float %t119, %t131
-  %t136 = fadd float %t134, %t132
-  %t137 = fsub float %t134, %t132
-  %t138 = fmul float %t137, 0x3FF6A09E60000000
-  %t139 = fadd float %t133, %t135
-  %t140 = fmul float %t139, 0x3FFD906BC0000000
-  %t141 = fmul float %t135, 0x3FF1517A80000000
-  %t142 = fsub float %t141, %t140
-  %t143 = fmul float %t133, 0xC004E7AEA0000000
-  %t144 = fadd float %t143, %t140
-  %t145 = fsub float %t144, %t136
-  %t146 = fsub float %t138, %t145
-  %t147 = fadd float %t142, %t146
-  %t148 = fadd float %t113, %t136
-  store float %t148, float* %t25, align 4
-  %t149 = fsub float %t113, %t136
-  store float %t149, float* %t24, align 4
-  %t150 = fadd float %t115, %t145
-  store float %t150, float* %t12, align 4
-  %t151 = fsub float %t115, %t145
-  store float %t151, float* %t22, align 4
-  %t152 = fadd float %t116, %t146
-  store float %t152, float* %t14, align 4
-  %t153 = fsub float %t116, %t146
-  store float %t153, float* %t20, align 4
-  %t154 = fadd float %t114, %t147
-  store float %t154, float* %t18, align 4
-  %t155 = fsub float %t114, %t147
-  store float %t155, float* %t16, align 4
-  br label %bb156
-
-bb156:
-  %t157 = add i32 %t10, 1
-  %t158 = icmp eq i32 %t157, 8
-  br i1 %t158, label %bb159, label %bb9
-
-bb159:
-  %t160 = add i32 %a4, 7
-  %t161 = add i32 %a4, 1
-  %t162 = add i32 %a4, 6
-  %t163 = add i32 %a4, 2
-  %t164 = add i32 %a4, 5
-  %t165 = add i32 %a4, 4
-  %t166 = add i32 %a4, 3
-  br label %bb167
-
-bb167:
-  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
-  %t169 = getelementptr i8** %a3, i32 %t168
-  %t170 = shl i32 %t168, 3
-  %t171 = or i32 %t170, 4
-  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
-  %t173 = or i32 %t170, 2
-  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
-  %t175 = or i32 %t170, 6
-  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
-  %t177 = or i32 %t170, 5
-  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
-  %t179 = or i32 %t170, 3
-  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
-  %t181 = or i32 %t170, 1
-  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
-  %t183 = or i32 %t170, 7
-  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
-  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
-  %t186 = load i8** %t169, align 4
-  %t187 = getelementptr inbounds i8* %t186, i32 %a4
-  %t188 = load float* %t185, align 4
-  %t189 = load float* %t172, align 4
-  %t190 = fadd float %t188, %t189
-  %t191 = fsub float %t188, %t189
-  %t192 = load float* %t174, align 4
-  %t193 = load float* %t176, align 4
-  %t194 = fadd float %t192, %t193
-  %t195 = fsub float %t192, %t193
-  %t196 = fmul float %t195, 0x3FF6A09E60000000
-  %t197 = fsub float %t196, %t194
-  %t198 = fadd float %t190, %t194
-  %t199 = fsub float %t190, %t194
-  %t200 = fadd float %t191, %t197
-  %t201 = fsub float %t191, %t197
-  %t202 = load float* %t178, align 4
-  %t203 = load float* %t180, align 4
-  %t204 = fadd float %t202, %t203
-  %t205 = fsub float %t202, %t203
-  %t206 = load float* %t182, align 4
-  %t207 = load float* %t184, align 4
-  %t208 = fadd float %t206, %t207
-  %t209 = fsub float %t206, %t207
-  %t210 = fadd float %t208, %t204
-  %t211 = fsub float %t208, %t204
-  %t212 = fmul float %t211, 0x3FF6A09E60000000
-  %t213 = fadd float %t205, %t209
-  %t214 = fmul float %t213, 0x3FFD906BC0000000
-  %t215 = fmul float %t209, 0x3FF1517A80000000
-  %t216 = fsub float %t215, %t214
-  %t217 = fmul float %t205, 0xC004E7AEA0000000
-  %t218 = fadd float %t217, %t214
-  %t219 = fsub float %t218, %t210
-  %t220 = fsub float %t212, %t219
-  %t221 = fadd float %t216, %t220
-  %t222 = fadd float %t198, %t210
-  %t223 = fptosi float %t222 to i32
-  %t224 = add nsw i32 %t223, 4
-  %t225 = lshr i32 %t224, 3
-  %t226 = and i32 %t225, 1023
-  %t227 = add i32 %t226, 128
-  %t228 = getelementptr inbounds i8* %t6, i32 %t227
-  %t229 = load i8* %t228, align 1
-  store i8 %t229, i8* %t187, align 1
-  %t230 = fsub float %t198, %t210
-  %t231 = fptosi float %t230 to i32
-  %t232 = add nsw i32 %t231, 4
-  %t233 = lshr i32 %t232, 3
-  %t234 = and i32 %t233, 1023
-  %t235 = add i32 %t234, 128
-  %t236 = getelementptr inbounds i8* %t6, i32 %t235
-  %t237 = load i8* %t236, align 1
-  %t238 = getelementptr inbounds i8* %t186, i32 %t160
-  store i8 %t237, i8* %t238, align 1
-  %t239 = fadd float %t200, %t219
-  %t240 = fptosi float %t239 to i32
-  %t241 = add nsw i32 %t240, 4
-  %t242 = lshr i32 %t241, 3
-  %t243 = and i32 %t242, 1023
-  %t244 = add i32 %t243, 128
-  %t245 = getelementptr inbounds i8* %t6, i32 %t244
-  %t246 = load i8* %t245, align 1
-  %t247 = getelementptr inbounds i8* %t186, i32 %t161
-  store i8 %t246, i8* %t247, align 1
-  %t248 = fsub float %t200, %t219
-  %t249 = fptosi float %t248 to i32
-  %t250 = add nsw i32 %t249, 4
-  %t251 = lshr i32 %t250, 3
-  %t252 = and i32 %t251, 1023
-  %t253 = add i32 %t252, 128
-  %t254 = getelementptr inbounds i8* %t6, i32 %t253
-  %t255 = load i8* %t254, align 1
-  %t256 = getelementptr inbounds i8* %t186, i32 %t162
-  store i8 %t255, i8* %t256, align 1
-  %t257 = fadd float %t201, %t220
-  %t258 = fptosi float %t257 to i32
-  %t259 = add nsw i32 %t258, 4
-  %t260 = lshr i32 %t259, 3
-  %t261 = and i32 %t260, 1023
-  %t262 = add i32 %t261, 128
-  %t263 = getelementptr inbounds i8* %t6, i32 %t262
-  %t264 = load i8* %t263, align 1
-  %t265 = getelementptr inbounds i8* %t186, i32 %t163
-  store i8 %t264, i8* %t265, align 1
-  %t266 = fsub float %t201, %t220
-  %t267 = fptosi float %t266 to i32
-  %t268 = add nsw i32 %t267, 4
-  %t269 = lshr i32 %t268, 3
-  %t270 = and i32 %t269, 1023
-  %t271 = add i32 %t270, 128
-  %t272 = getelementptr inbounds i8* %t6, i32 %t271
-  %t273 = load i8* %t272, align 1
-  %t274 = getelementptr inbounds i8* %t186, i32 %t164
-  store i8 %t273, i8* %t274, align 1
-  %t275 = fadd float %t199, %t221
-  %t276 = fptosi float %t275 to i32
-  %t277 = add nsw i32 %t276, 4
-  %t278 = lshr i32 %t277, 3
-  %t279 = and i32 %t278, 1023
-  %t280 = add i32 %t279, 128
-  %t281 = getelementptr inbounds i8* %t6, i32 %t280
-  %t282 = load i8* %t281, align 1
-  %t283 = getelementptr inbounds i8* %t186, i32 %t165
-  store i8 %t282, i8* %t283, align 1
-  %t284 = fsub float %t199, %t221
-  %t285 = fptosi float %t284 to i32
-  %t286 = add nsw i32 %t285, 4
-  %t287 = lshr i32 %t286, 3
-  %t288 = and i32 %t287, 1023
-  %t289 = add i32 %t288, 128
-  %t290 = getelementptr inbounds i8* %t6, i32 %t289
-  %t291 = load i8* %t290, align 1
-  %t292 = getelementptr inbounds i8* %t186, i32 %t166
-  store i8 %t291, i8* %t292, align 1
-  %t293 = add nsw i32 %t168, 1
-  %t294 = icmp eq i32 %t293, 8
-  br i1 %t294, label %bb295, label %bb167
-
-bb295:
-  ret void
-}
-
-%struct.ct_data_s = type { %union.anon, %union.anon }
-%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
-%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
-%struct.static_tree_desc = type { i32 }
-%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
-%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
-%union.anon = type { i16 }
-
-define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
-entry:
-  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
-  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
-  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
-  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
-  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
-  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
-  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
-  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
-  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
-  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
-  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
-  br i1 %14, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
-  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
-  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
-  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
-  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
-  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
-  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
-  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
-  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
-  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
-  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
-  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
-  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
-  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
-  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
-  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
-  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
-  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
-  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
-  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
-  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
-  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
-  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
-  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
-  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
-  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
-  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
-  br label %bb6
-
-bb6:                                              ; preds = %bb24, %bb2
-  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
-  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
-  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
-  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
-  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
-  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
-  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
-  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
-  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
-  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
-  br i1 %40, label %bb7, label %bb23
-
-bb7:                                              ; preds = %bb6
-  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
-  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
-  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
-  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
-  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
-  br i1 %44, label %bb8, label %bb23
-
-bb8:                                              ; preds = %bb7
-  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
-  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
-  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
-  br i1 %47, label %bb9, label %bb23
-
-bb9:                                              ; preds = %bb8
-  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
-  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
-  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
-  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
-  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
-  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
-  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
-  br i1 %52, label %bb10, label %bb23
-
-bb10:                                             ; preds = %bb9
-  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
-  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
-  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
-  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
-  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
-  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
-  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
-  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb18, %bb10
-  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
-  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
-  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
-  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
-  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
-  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
-  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
-  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
-  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
-  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
-  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
-  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
-  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
-  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
-  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
-  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
-  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
-  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
-  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
-  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
-  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
-  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
-  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
-  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
-  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
-  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
-  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
-  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
-  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
-  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
-  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
-  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
-  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
-  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
-  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
-  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
-  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
-  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
-  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
-  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
-  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
-  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
-  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
-  br i1 %55, label %bb12, label %bb20
-
-bb12:                                             ; preds = %bb11
-  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
-  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
-  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
-  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
-  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
-  br i1 %58, label %bb13, label %bb20
-
-bb13:                                             ; preds = %bb12
-  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
-  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
-  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
-  br i1 %61, label %bb14, label %bb20
-
-bb14:                                             ; preds = %bb13
-  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
-  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
-  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
-  br i1 %64, label %bb15, label %bb20
-
-bb15:                                             ; preds = %bb14
-  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
-  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
-  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
-  br i1 %67, label %bb16, label %bb20
-
-bb16:                                             ; preds = %bb15
-  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
-  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
-  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
-  br i1 %70, label %bb17, label %bb20
-
-bb17:                                             ; preds = %bb16
-  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
-  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
-  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
-  br i1 %73, label %bb18, label %bb20
-
-bb18:                                             ; preds = %bb17
-  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
-  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
-  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
-  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
-  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
-  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-  br i1 %or.cond, label %bb11, label %bb20
-
-bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
-  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
-  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
-  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
-  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
-  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
-  br i1 %81, label %bb21, label %bb23
-
-bb21:                                             ; preds = %bb20
-  store i32 %cur_match_addr.0, i32* %34, align 4
-  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
-  br i1 %82, label %bb22, label %bb25
-
-bb22:                                             ; preds = %bb21
-  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
-  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
-  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
-  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
-  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
-  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
-  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
-  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
-  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
-  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
-  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
-  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
-  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
-  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
-  br i1 %91, label %bb24, label %bb25
-
-bb24:                                             ; preds = %bb23
-
-; LSR should use count-down iteration to avoid requiring the trip count
-; in a register.
-
-;      CHECK: @ %bb24
-; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
-; CHECK: bne.w
-
-  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
-  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
-  br i1 %92, label %bb25, label %bb6
-
-bb25:                                             ; preds = %bb24, %bb23, %bb21
-  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
-  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
-  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
-  ret i32 %merge
-}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..3ac7d77 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,13 +6,42 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
     %tmp1 = icmp eq i32 %cond1, 0
     %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
     %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 30b9f59..dc77282 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
 
-; The ARM magic hinting works best with linear scan.
 ; CHECK: ldrd
 ; CHECK: strd
 ; CHECK: ldrb
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 3cb8a8e..c50a233 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -41,3 +41,45 @@ entry:
         ret i32 %0
 }
 
+define i32 @tn9(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn9:
+; CHECK: add	r0, r0, r0, lsl #3
+; CHECK: rsb	r0, r0, #0
+        %0 = mul i32 %v, -9
+        ret i32 %0
+}
+
+define i32 @tn7(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn7:
+; CHECK: sub r0, r0, r0, lsl #3
+	%0 = mul i32 %v, -7
+	ret i32 %0
+}
+
+define i32 @tn5(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn5:
+; CHECK: add r0, r0, r0, lsl #2
+; CHECK: rsb r0, r0, #0
+        %0 = mul i32 %v, -5
+        ret i32 %0
+}
+
+define i32 @tn3(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn3:
+; CHECK: sub r0, r0, r0, lsl #2
+        %0 = mul i32 %v, -3
+        ret i32 %0
+}
+
+define i32 @tn12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn12288:
+; CHECK: sub r0, r0, r0, lsl #2
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, -12288
+        ret i32 %0
+}
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index c78872a..b892d2d 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vadd.i16 d
+; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
@@ -12,6 +15,11 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vsub.i16 d
+; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 130277b..944bfe0 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vadd.i64 q
+; CHECK: vstmia
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -12,6 +15,12 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vsub.i64 q
+; CHECK: vmov r0, r1, d
+; CHECK: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..277bd05
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs -O0
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
new file mode 100644
index 0000000..b4da552
--- /dev/null
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+
+; CHECK: func_4_8
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_16
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
+  %r = add <2 x i16> %param, <i16 1, i16 2>
+  store <2 x i16> %r, <2 x i16>* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index e670a5b..e72d51f 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; XFAIL: *
+; PR11364
 
 ; vmov s0, r0 + vmov r0, s0 should have been optimized away.
 ; rdar://9104514
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
new file mode 100644
index 0000000..d1d0ee5
--- /dev/null
+++ b/test/CodeGen/ARM/reg_asc_order.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+;CHECK: ldm
+;CHECK: stm
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3a19211..05794e4 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -155,7 +155,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
@@ -240,7 +240,7 @@ bb14:                                             ; preds = %bb6
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
 ; CHECK-NEXT:   vstmia r0, {d16, d17}
@@ -272,8 +272,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index f43dde5..c9ac66a 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,14 +64,14 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
 ; ARM: t4:
 ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
 
 ; ARMT2: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
 ; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
+; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index f1bd7ee..3e07da8 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,12 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
-; CHECK-NEON:      adr     r2, LCPI7_0
-; CHECK-NEON-NEXT: movw    r3, #1123
-; CHECK-NEON-NEXT: adds    r1, r2, #4
-; CHECK-NEON-NEXT: cmp     r0, r3
+; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
+; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   r1, r2
+; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..ca2e18a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..eb971ff 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -55,11 +55,15 @@ define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A8: str [[REG]], [r0, r1, lsl #2]
+; A8-NOT: str [[REG]], [r0]
 
 ; A9: test4:
 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
+; A9-NOT: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f4e3a44..983ba45 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
 
 ; The greedy register allocator uses a single CSR here, invalidating the test.
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 993d7ec..03ae12c 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,13 +12,13 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    s1, r0
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must be spilled:
-; CHECK: vstr.64 d0,
+; CHECK: vstr d0,
 ; CHECK: asm clobber d0
 ; And reloaded after the asm:
-; CHECK: vldr.64 [[D16:d[0-9]+]],
-; CHECK: vstr.64 [[D16]], [r1]
+; CHECK: vldr [[D16:d[0-9]+]],
+; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
   %v1 = insertelement <2 x float> undef, float %x, i32 1
   %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
@@ -37,13 +37,13 @@ define void @f1(float %x, <2 x float>* %p) {
 ; virtual register.  It doesn't read the old value.
 ;
 ; CHECK: f2
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must not be spilled:
-; CHECK-NOT: vstr.64
+; CHECK-NOT: vstr
 ; CHECK: asm clobber d0
 ; But instead rematerialize after the asm:
-; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI
-; CHECK: vstr.64 [[D0:d[0-9]+]], [r0]
+; CHECK: vldr [[S0:s[0-9]+]], LCPI
+; CHECK: vstr [[D0:d[0-9]+]], [r0]
 define void @f2(<2 x float>* %p) {
   %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index 14e668e..f157dbd 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -2,8 +2,8 @@
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -16,8 +16,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK: v_bsli16:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -30,8 +30,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK: v_bsli32:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK: v_bsli64:
-;CHECK: vldr.64
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 1387393..7fddbed 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 
 ; Test signed conversion.
 ; CHECK: t1
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>)
 
 ; Test unsigned conversion.
 ; CHECK: t2
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
   %tmp = load i32* @uin, align 4, !tbaa !3
@@ -38,7 +38,7 @@ entry:
 
 ; Test which should not fold due to non-power of 2.
 ; CHECK: t3
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -52,7 +52,7 @@ entry:
 
 ; Test which should not fold due to power of 2 out of range.
 ; CHECK: t4
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -66,7 +66,7 @@ entry:
 
 ; Test case where const is max power of 2 (i.e., 2^32).
 ; CHECK: t5
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -80,7 +80,7 @@ entry:
 
 ; Test quadword.
 ; CHECK: t6
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index e99fac1..05332e4 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -254,7 +254,7 @@ entry:
 ;CHECK: redundantVdup:
 ;CHECK: vmov.i8
 ;CHECK-NOT: vdup.8
-;CHECK: vstr.64
+;CHECK: vstr
 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 81bdc44..a38a0fe 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -80,7 +80,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 ; so they are not split up into i32 values.  Radar 8755338.
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
@@ -89,7 +89,7 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %vec = load <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
@@ -99,7 +99,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
-; CHECK: vstr.64
+; CHECK: vstr
   %vec = load <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
@@ -123,3 +123,13 @@ define void @orVec(<3 x i8>* %A) nounwind {
   ret void
 }
 
+; The following test was hitting an assertion in the DAG combiner when
+; constant folding the multiply because the "sext undef" was translated to
+; a BUILD_VECTOR with i32 0 operands, which did not match the i16 operands
+; of the other BUILD_VECTOR.
+define i16 @foldBuildVectors() {
+  %1 = sext <8 x i8> undef to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
new file mode 100644
index 0000000..5e9239f
--- /dev/null
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: g:
+define float @g(<4 x i8>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i8>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i8> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: h:
+define <4 x i8> @h(<4 x float> %v) {
+  ; CHECK: vcvt.{{[us]}}32.f32
+  ; CHECK: vmovn.i32
+  %1 = fptoui <4 x float> %v to <4 x i8>
+  ret <4 x i8> %1
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 65b5913..e224bdf 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -138,7 +138,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; Make sure this doesn't crash
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK: test_elem_mismatch:
-; CHECK: vstr.64
+; CHECK: vstr
   %tmp0 = load <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index d0e9ac3..61d73c1 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0]
+;CHECK: vld1.32 {d16[]}, [r0, :32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0]
+;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 0d7d4ec..7bd0cbd 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -31,9 +31,19 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
         ret <2 x i32> %tmp3
 }
 
+define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32a32:
+;Check the alignment value.  Legal values are none or :32.
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 4
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0]
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -69,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0]
+;CHECK: vld1.32 {d16[0]}, [r0, :32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index a86be32b..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -56,13 +56,13 @@ define <2 x i32> @v_movi32d() nounwind {
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d{{.*}}, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
@@ -92,19 +92,19 @@ define <2 x i32> @v_mvni32d() nounwind {
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
@@ -152,19 +152,19 @@ define <4 x i32> @v_movQi32d() nounwind {
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q{{.*}}, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20ff
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20ffff
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d{{.*}}, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xb5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -353,3 +353,48 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
 }
+
+; Use vmov.f32 to materialize f32 immediate splats
+; rdar://10437054
+define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v2f32:
+;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
+  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32:
+;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
+  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32_undef:
+;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
+  %a = load <4 x float> *%p
+  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
+  store <4 x float> %b, <4 x float> *%p
+  ret void
+}
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1780d6e..61d89bb 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -514,3 +514,14 @@ entry:
   store <8 x i8> %10, <8 x i8>* %11, align 8
   ret void
 }
+
+; If one operand has a zero-extend and the other a sign-extend, vmull
+; cannot be used.
+define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
+; CHECK: vmullWithInconsistentExtensions
+; CHECK-NOT: vmull.s8
+  %1 = sext <8 x i8> %vec to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 34acd16..122ec03 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -148,11 +148,11 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   ret void
 }
 
-; vrev <4 x i16> should use VREV32 and not VREV64
+; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
+; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vext.16
-; CHECK: vrev32.16
+; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 08b7232..758b355 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -358,6 +358,13 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+; Make sure this doesn't crash; PR10258
+define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
+;CHECK: variable_insertelement:
+    %r = insertelement <8 x i16> %a, i16 %b, i32 %c
+    ret <8 x i16> %r
+}
+
 declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 8fd99ba..2cffda3 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
-; The 0.0 constant is loaded from the constant pool and kept in a register.
+; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
 ; CHECK: %entry
-; CHECK: vldr.32 s
+; CHECK: vldr s
 ; The float loop variable is initialized with a vmovs from the constant register.
 ; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
 ; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
@@ -24,8 +24,8 @@ for.body4:
   br label %for.body.i
 
 for.body.i:
-  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
-  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 1.000000e+10
   %exitcond.i = icmp eq i32 undef, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i