44 files changed, 513 insertions, 125 deletions
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index ee93fde..2b7ccd8 100644
--- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
 
 	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
 	%struct.VEC2 = type { double, double, double }
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 98cab9a..3909c6a 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: fstdhi
+;CHECK: vstrhi.64
   store double %1, double* %y, align 4
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 6281775..5476d5f 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: fldd
+; CHECK: vldr.64
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
index c260b97..62f3786 100644
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
@@ -11,12 +11,15 @@ entry:
   %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
   store <4 x float> %quat.0, <4 x float>* %0
   %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+;CHECK: vmov.f32
+;CHECK: vmov.f32
   %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
   %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
   %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
   %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vmov
   %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
   %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
   %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 0000000..dd2845f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %1 = fmul float %0, undef                       ; <float> [#uses=2]
+  %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
+  %3 = fmul float %0, %1                          ; <float> [#uses=1]
+  %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
+  %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: fconsts s{{[0-9]+}}, #112
+  %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
+  %7 = fsub float %2, undef                       ; <float> [#uses=1]
+  %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
+  %9 = fadd float %2, undef                       ; <float> [#uses=3]
+  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %11 = fmul float %8, %10                        ; <float> [#uses=1]
+  %12 = fadd float undef, %11                     ; <float> [#uses=2]
+  %13 = fmul float undef, undef                   ; <float> [#uses=1]
+  %14 = fmul float %6, 0.000000e+00               ; <float> [#uses=1]
+  %15 = fadd float %13, %14                       ; <float> [#uses=1]
+  %16 = fmul float %9, %10                        ; <float> [#uses=1]
+  %17 = fadd float %15, %16                       ; <float> [#uses=2]
+  %18 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
+  %20 = fmul float undef, %10                     ; <float> [#uses=1]
+  %21 = fadd float %19, %20                       ; <float> [#uses=1]
+  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %23 = fmul float %5, %22                        ; <float> [#uses=1]
+  %24 = fadd float %23, undef                     ; <float> [#uses=1]
+  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %26 = fmul float %8, %25                        ; <float> [#uses=1]
+  %27 = fadd float %24, %26                       ; <float> [#uses=1]
+  %28 = fmul float %9, %25                        ; <float> [#uses=1]
+  %29 = fadd float undef, %28                     ; <float> [#uses=1]
+  %30 = fmul float %8, undef                      ; <float> [#uses=1]
+  %31 = fadd float undef, %30                     ; <float> [#uses=1]
+  %32 = fmul float %6, undef                      ; <float> [#uses=1]
+  %33 = fadd float undef, %32                     ; <float> [#uses=1]
+  %34 = fmul float %9, undef                      ; <float> [#uses=1]
+  %35 = fadd float %33, %34                       ; <float> [#uses=1]
+  %36 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %37 = fmul float %7, undef                      ; <float> [#uses=1]
+  %38 = fadd float %36, %37                       ; <float> [#uses=1]
+  %39 = fmul float undef, undef                   ; <float> [#uses=1]
+  %40 = fadd float %38, %39                       ; <float> [#uses=1]
+  store float %12, float* undef, align 8
+  store float %17, float* undef, align 4
+  store float %21, float* undef, align 8
+  store float %27, float* undef, align 8
+  store float %29, float* undef, align 4
+  store float %31, float* undef, align 8
+  store float %40, float* undef, align 8
+  store float %12, float* null, align 8
+  %41 = fmul float %17, undef                     ; <float> [#uses=1]
+  %42 = fadd float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float %35, undef                     ; <float> [#uses=1]
+  %44 = fadd float %42, %43                       ; <float> [#uses=1]
+  store float %44, float* null, align 4
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 0000000..efc4be1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+  br i1 undef, label %bb81, label %bb48
+
+bb48:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+  ret float 0.000000e+00
+
+bb81:                                             ; preds = %entry
+  ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 0000000..6cce02d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
+  %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
+  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %5 = fmul float %4, %2                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, undef                       ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = fmul float undef, %2                       ; <float> [#uses=1]
+  %10 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=1]
+  %12 = fmul float undef, %6                      ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, %8               ; <float> [#uses=1]
+  %14 = fadd float %12, %13                       ; <float> [#uses=1]
+  %15 = fmul float %1, %11                        ; <float> [#uses=1]
+  %16 = fadd float %14, %15                       ; <float> [#uses=1]
+  %17 = select i1 undef, float undef, float %16   ; <float> [#uses=1]
+  %18 = fdiv float %17, 0.000000e+00              ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  %19 = fmul float %4, undef                      ; <float> [#uses=1]
+  store float %19, float* %0, align 4
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 0000000..3ff6631
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+  %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %5 = fsub float %3, undef                       ; <float> [#uses=2]
+  %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %11 = fsub float %10, undef                     ; <float> [#uses=2]
+  %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %14 = fsub float %13, undef                     ; <float> [#uses=1]
+  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %16 = fsub float %15, undef                     ; <float> [#uses=1]
+  %17 = fmul float %5, %16                        ; <float> [#uses=1]
+  %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
+  %19 = fmul float %8, %11                        ; <float> [#uses=1]
+  %20 = fsub float %19, undef                     ; <float> [#uses=3]
+  %21 = fmul float %1, %14                        ; <float> [#uses=1]
+  %22 = fmul float %5, %11                        ; <float> [#uses=1]
+  %23 = fsub float %21, %22                       ; <float> [#uses=2]
+  store float %18, float* undef
+  %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+  store float %20, float* %24
+  store float %23, float* undef
+  %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+  %26 = fmul float %18, %18                       ; <float> [#uses=1]
+  %27 = fadd float %26, undef                     ; <float> [#uses=1]
+  %28 = fadd float %27, undef                     ; <float> [#uses=1]
+  %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
+  %30 = load float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %35 = fsub float %33, %34                       ; <float> [#uses=2]
+  %36 = fmul float %20, %35                       ; <float> [#uses=1]
+  %37 = fsub float %36, undef                     ; <float> [#uses=1]
+  %38 = fmul float %23, 0.000000e+00              ; <float> [#uses=1]
+  %39 = fmul float %18, %35                       ; <float> [#uses=1]
+  %40 = fsub float %38, %39                       ; <float> [#uses=1]
+  %41 = fmul float %18, 0.000000e+00              ; <float> [#uses=1]
+  %42 = fmul float %20, 0.000000e+00              ; <float> [#uses=1]
+  %43 = fsub float %41, %42                       ; <float> [#uses=1]
+  %44 = fmul float 0.000000e+00, %37              ; <float> [#uses=1]
+  %45 = fmul float %31, %40                       ; <float> [#uses=1]
+  %46 = fadd float %44, %45                       ; <float> [#uses=1]
+  %47 = fmul float %33, %43                       ; <float> [#uses=1]
+  %48 = fadd float %46, %47                       ; <float> [#uses=2]
+  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %50 = fsub float %30, %49                       ; <float> [#uses=1]
+  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %52 = fsub float %32, %51                       ; <float> [#uses=2]
+  %53 = load float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %55 = fmul float %54, undef                     ; <float> [#uses=1]
+  %56 = fmul float undef, %52                     ; <float> [#uses=1]
+  %57 = fsub float %55, %56                       ; <float> [#uses=1]
+  %58 = fmul float undef, %52                     ; <float> [#uses=1]
+  %59 = fmul float %54, %50                       ; <float> [#uses=1]
+  %60 = fsub float %58, %59                       ; <float> [#uses=1]
+  %61 = fmul float %30, %57                       ; <float> [#uses=1]
+  %62 = fmul float %32, 0.000000e+00              ; <float> [#uses=1]
+  %63 = fadd float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %34, %60                       ; <float> [#uses=1]
+  %65 = fadd float %63, %64                       ; <float> [#uses=2]
+  %66 = fcmp olt float %48, %65                   ; <i1> [#uses=1]
+  %67 = fsub float %49, 0.000000e+00              ; <float> [#uses=1]
+  %68 = fsub float %51, %31                       ; <float> [#uses=1]
+  %69 = fsub float %53, %33                       ; <float> [#uses=1]
+  %70 = fmul float undef, %67                     ; <float> [#uses=1]
+  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %72 = fmul float %71, %69                       ; <float> [#uses=1]
+  %73 = fsub float %70, %72                       ; <float> [#uses=1]
+  %74 = fmul float %71, %68                       ; <float> [#uses=1]
+  %75 = fsub float %74, 0.000000e+00              ; <float> [#uses=1]
+  %76 = fmul float %51, %73                       ; <float> [#uses=1]
+  %77 = fadd float undef, %76                     ; <float> [#uses=1]
+  %78 = fmul float %53, %75                       ; <float> [#uses=1]
+  %79 = fadd float %77, %78                       ; <float> [#uses=1]
+  %80 = select i1 %66, float %48, float %65       ; <float> [#uses=1]
+  %81 = select i1 undef, float %80, float %79     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+  %82 = fdiv float %81, %iftmp.164.0              ; <float> [#uses=1]
+  %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+  store float %iftmp.165.0, float* undef, align 4
+  br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 0000000..832ff4f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  br i1 false, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+  %5 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %8 = fsub float undef, undef                    ; <float> [#uses=1]
+  %9 = fmul float 0.000000e+00, %8                ; <float> [#uses=1]
+  %10 = fmul float %5, 0.000000e+00               ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=3]
+  %12 = fmul float %2, 0.000000e+00               ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %14 = fsub float %12, %13                       ; <float> [#uses=2]
+  store float %14, float* undef
+  %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+  store float 0.000000e+00, float* %15
+  %16 = fmul float %11, %11                       ; <float> [#uses=1]
+  %17 = fadd float %16, 0.000000e+00              ; <float> [#uses=1]
+  %18 = fadd float %17, undef                     ; <float> [#uses=1]
+  %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
+  %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
+  %21 = load float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %25 = fsub float %23, %24                       ; <float> [#uses=2]
+  %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
+  %27 = fsub float %26, undef                     ; <float> [#uses=1]
+  %28 = fmul float %14, 0.000000e+00              ; <float> [#uses=1]
+  %29 = fmul float %11, %25                       ; <float> [#uses=1]
+  %30 = fsub float %28, %29                       ; <float> [#uses=1]
+  %31 = fsub float undef, 0.000000e+00            ; <float> [#uses=1]
+  %32 = fmul float %21, %27                       ; <float> [#uses=1]
+  %33 = fmul float undef, %30                     ; <float> [#uses=1]
+  %34 = fadd float %32, %33                       ; <float> [#uses=1]
+  %35 = fmul float %23, %31                       ; <float> [#uses=1]
+  %36 = fadd float %34, %35                       ; <float> [#uses=1]
+  %37 = load float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %39 = fsub float %22, %38                       ; <float> [#uses=2]
+  %40 = load float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %42 = fmul float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float undef, %39                     ; <float> [#uses=1]
+  %44 = fsub float %42, %43                       ; <float> [#uses=1]
+  %45 = fmul float undef, %39                     ; <float> [#uses=1]
+  %46 = fmul float %41, 0.000000e+00              ; <float> [#uses=1]
+  %47 = fsub float %45, %46                       ; <float> [#uses=1]
+  %48 = fmul float 0.000000e+00, %44              ; <float> [#uses=1]
+  %49 = fmul float %22, undef                     ; <float> [#uses=1]
+  %50 = fadd float %48, %49                       ; <float> [#uses=1]
+  %51 = fmul float %24, %47                       ; <float> [#uses=1]
+  %52 = fadd float %50, %51                       ; <float> [#uses=1]
+  %53 = fsub float %37, %21                       ; <float> [#uses=2]
+  %54 = fmul float undef, undef                   ; <float> [#uses=1]
+  %55 = fmul float undef, undef                   ; <float> [#uses=1]
+  %56 = fsub float %54, %55                       ; <float> [#uses=1]
+  %57 = fmul float undef, %53                     ; <float> [#uses=1]
+  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %59 = fmul float %58, undef                     ; <float> [#uses=1]
+  %60 = fsub float %57, %59                       ; <float> [#uses=1]
+  %61 = fmul float %58, undef                     ; <float> [#uses=1]
+  %62 = fmul float undef, %53                     ; <float> [#uses=1]
+  %63 = fsub float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %37, %56                       ; <float> [#uses=1]
+  %65 = fmul float %38, %60                       ; <float> [#uses=1]
+  %66 = fadd float %64, %65                       ; <float> [#uses=1]
+  %67 = fmul float %40, %63                       ; <float> [#uses=1]
+  %68 = fadd float %66, %67                       ; <float> [#uses=1]
+  %69 = select i1 undef, float %36, float %52     ; <float> [#uses=1]
+  %70 = select i1 undef, float %69, float %68     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+  %71 = fdiv float %70, %iftmp.164.0              ; <float> [#uses=1]
+  store float %71, float* null, align 4
+  %72 = icmp eq %bar* null, %0                    ; <i1> [#uses=1]
+  br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index d8019a0..062133e 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
 
 define float @f(float %z, double %a, float %b) {
-; CHECK: fcpys s0, s1
+; CHECK: vmov.f32 s0, s1
         %tmp = call float @g(float %b)
         ret float %tmp
 }
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index 5f3ed1d..fac2bc5 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fcmpes
+; RUN:   grep vcmpe.f32
 
 define void @test3(float* %glob, i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 5690a01..46f136b 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index a01f868..1426a2d 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index bf7c305..a6d7410 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm | grep bic | count 2
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fneg | count 2
+; RUN:   grep vneg | count 2
 
 define float @test1(float %x, double %y) {
 	%tmp = fpext float %x to double
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 2af250d..45803f6 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index ebf1d84..6db2385 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index 5c31ea6..57efa82 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index c6e6d40..31b5c52 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index cb5dade..735263c 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 7da443d..bc3d42d 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 
 define float @test1(float* %a) {
 entry:
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 8fc13e7..724947e 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -4,14 +4,14 @@
 
 define float @test(float %acc, float %a, float %b) {
 entry:
-; VFP2: fnmacs
-; NEON: fnmacs
+; VFP2: vmls.f32
+; NEON: vmls.f32
 
 ; NEONFP-NOT: vmls
-; NEONFP-NOT: fcpys
+; NEONFP-NOT: vmov.f32
 ; NEONFP:     vmul.f32
 ; NEONFP:     vsub.f32
-; NEONFP:     fmrs
+; NEONFP:     vmov
 
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 3ae437d..ad21882 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
 
 define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
@@ -14,7 +14,7 @@ entry:
 }
 
 define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 613b347..6d7bc05 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 4e4ef72..8fbd45b 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -2,9 +2,9 @@
 
 define float @f(i32 %a) {
 ;CHECK: f:
-;CHECK: fmsr
-;CHECK-NEXT: fsitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -12,9 +12,9 @@ entry:
 
 define double @g(i32 %a) {
 ;CHECK: g:
-;CHECK: fmsr
-;CHECK-NEXT: fsitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -22,9 +22,9 @@ entry:
 
 define double @uint_to_double(i32 %a) {
 ;CHECK: uint_to_double:
-;CHECK: fmsr
-;CHECK-NEXT: fuitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -32,9 +32,9 @@ entry:
 
 define float @uint_to_float(i32 %a) {
 ;CHECK: uint_to_float:
-;CHECK: fmsr
-;CHECK-NEXT: fuitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -42,8 +42,8 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: fldd
-;CHECK-NEXT: fmrrd
+;CHECK: vldr.64 
+;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
@@ -58,13 +58,13 @@ entry:
 
 define double @f2(double %a) {
 ;CHECK: f2:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
         ret double %a
 }
 
 define void @f3() {
 ;CHECK: f3:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
 ;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 9ce2ac5..2adac78 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -6,7 +6,7 @@
 
 define i32 @test1(float %a, float %b) {
 ; VFP2: test1:
-; VFP2: ftosizs s0, s0
+; VFP2: vcvt.s32.f32 s0, s0
 ; NEON: test1:
 ; NEON: vcvt.s32.f32 d0, d0
 entry:
@@ -17,7 +17,7 @@ entry:
 
 define i32 @test2(float %a, float %b) {
 ; VFP2: test2:
-; VFP2: ftouizs s0, s0
+; VFP2: vcvt.u32.f32 s0, s0
 ; NEON: test2:
 ; NEON: vcvt.u32.f32 d0, d0
 entry:
@@ -28,7 +28,7 @@ entry:
 
 define float @test3(i32 %a, i32 %b) {
 ; VFP2: test3:
-; VFP2: fuitos s0, s0
+; VFP2: vcvt.f32.u32 s0, s0
 ; NEON: test3:
 ; NEON: vcvt.f32.u32 d0, d0
 entry:
@@ -39,7 +39,7 @@ entry:
 
 define float @test4(i32 %a, i32 %b) {
 ; VFP2: test4:
-; VFP2: fsitos s0, s0
+; VFP2: vcvt.f32.s32 s0, s0
 ; NEON: test4:
 ; NEON: vcvt.f32.s32 d0, d0
 entry:
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ebeeb18..ce6d6b2 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -2,7 +2,7 @@
 
 define float @f1(float %a, float %b) {
 ;CHECK: f1:
-;CHECK: fadds
+;CHECK: vadd.f32
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -10,7 +10,7 @@ entry:
 
 define double @f2(double %a, double %b) {
 ;CHECK: f2:
-;CHECK: faddd
+;CHECK: vadd.f64
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -18,7 +18,7 @@ entry:
 
 define float @f3(float %a, float %b) {
 ;CHECK: f3:
-;CHECK: fmuls
+;CHECK: vmul.f32
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -26,7 +26,7 @@ entry:
 
 define double @f4(double %a, double %b) {
 ;CHECK: f4:
-;CHECK: fmuld
+;CHECK: vmul.f64
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -34,7 +34,7 @@ entry:
 
 define float @f5(float %a, float %b) {
 ;CHECK: f5:
-;CHECK: fsubs
+;CHECK: vsub.f32
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -42,7 +42,7 @@ entry:
 
 define double @f6(double %a, double %b) {
 ;CHECK: f6:
-;CHECK: fsubd
+;CHECK: vsub.f64
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -58,7 +58,7 @@ entry:
 
 define double @f8(double %a) {
 ;CHECK: f8:
-;CHECK: fnegd
+;CHECK: vneg.f64
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
@@ -66,7 +66,7 @@ entry:
 
 define float @f9(float %a, float %b) {
 ;CHECK: f9:
-;CHECK: fdivs
+;CHECK: vdiv.f32
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
@@ -74,7 +74,7 @@ entry:
 
 define double @f10(double %a, double %b) {
 ;CHECK: f10:
-;CHECK: fdivd
+;CHECK: vdiv.f64
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
@@ -92,7 +92,7 @@ declare float @fabsf(float)
 
 define double @f12(double %a) {
 ;CHECK: f12:
-;CHECK: fabsd
+;CHECK: vabs.f64
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index 2c9591c..260ec49 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(float %a) {
 ;CHECK: f1:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -12,7 +12,7 @@ entry:
 
 define i32 @f2(float %a) {
 ;CHECK: f2:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -22,7 +22,7 @@ entry:
 
 define i32 @f3(float %a) {
 ;CHECK: f3:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -32,7 +32,7 @@ entry:
 
 define i32 @f4(float %a) {
 ;CHECK: f4:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 
 define i32 @f5(float %a) {
 ;CHECK: f5:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -52,7 +52,7 @@ entry:
 
 define i32 @f6(float %a) {
 ;CHECK: f6:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -62,7 +62,7 @@ entry:
 
 define i32 @g1(double %a) {
 ;CHECK: g1:
-;CHECK: fcmped
+;CHECK: vcmpe.f64
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index ee3c338..bf197a4 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -3,7 +3,7 @@
 
 define float @f1(double %x) {
 ;CHECK-VFP: f1:
-;CHECK-VFP: fcvtsd
+;CHECK-VFP: vcvt.f32.f64
 ;CHECK: f1:
 ;CHECK: truncdfsf2
 entry:
@@ -13,7 +13,7 @@ entry:
 
 define double @f2(float %x) {
 ;CHECK-VFP: f2:
-;CHECK-VFP: fcvtds
+;CHECK-VFP: vcvt.f64.f32
 ;CHECK: f2:
 ;CHECK: extendsfdf2
 entry:
@@ -23,7 +23,7 @@ entry:
 
 define i32 @f3(float %x) {
 ;CHECK-VFP: f3:
-;CHECK-VFP: ftosizs
+;CHECK-VFP: vcvt.s32.f32
 ;CHECK: f3:
 ;CHECK: fixsfsi
 entry:
@@ -33,7 +33,7 @@ entry:
 
 define i32 @f4(float %x) {
 ;CHECK-VFP: f4:
-;CHECK-VFP: ftouizs
+;CHECK-VFP: vcvt.u32.f32
 ;CHECK: f4:
 ;CHECK: fixunssfsi
 entry:
@@ -43,7 +43,7 @@ entry:
 
 define i32 @f5(double %x) {
 ;CHECK-VFP: f5:
-;CHECK-VFP: ftosizd
+;CHECK-VFP: vcvt.s32.f64
 ;CHECK: f5:
 ;CHECK: fixdfsi
 entry:
@@ -53,7 +53,7 @@ entry:
 
 define i32 @f6(double %x) {
 ;CHECK-VFP: f6:
-;CHECK-VFP: ftouizd
+;CHECK-VFP: vcvt.u32.f64
 ;CHECK: f6:
 ;CHECK: fixunsdfsi
 entry:
@@ -63,7 +63,7 @@ entry:
 
 define float @f7(i32 %a) {
 ;CHECK-VFP: f7:
-;CHECK-VFP: fsitos
+;CHECK-VFP: vcvt.f32.s32
 ;CHECK: f7:
 ;CHECK: floatsisf
 entry:
@@ -73,7 +73,7 @@ entry:
 
 define double @f8(i32 %a) {
 ;CHECK-VFP: f8:
-;CHECK-VFP: fsitod
+;CHECK-VFP: vcvt.f64.s32
 ;CHECK: f8:
 ;CHECK: floatsidf
 entry:
@@ -83,7 +83,7 @@ entry:
 
 define float @f9(i32 %a) {
 ;CHECK-VFP: f9:
-;CHECK-VFP: fuitos
+;CHECK-VFP: vcvt.f32.u32
 ;CHECK: f9:
 ;CHECK: floatunsisf
 entry:
@@ -93,7 +93,7 @@ entry:
 
 define double @f10(i32 %a) {
 ;CHECK-VFP: f10:
-;CHECK-VFP: fuitod
+;CHECK-VFP: vcvt.f64.u32
 ;CHECK: f10:
 ;CHECK: floatunsidf
 entry:
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 0822fbf..c3cff18 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: flds{{.*}}[
+; CHECK: vldr.32{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: fsts{{.*}}[
+; CHECK: vstr.32{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 0d270b0..4cacc5d 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep -E {vmov\\W*r\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index 060dd46..f84ccdd 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 8ed58bd..886c0d5 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -40,14 +40,14 @@ define i32 @test1() {
 
 ; DarwinPIC: _test1:
 ; DarwinPIC: 	ldr r0, LCPI1_0
-; DarwinPIC: LPC0:
+; DarwinPIC: LPC1_0:
 ; DarwinPIC:    ldr r0, [pc, +r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
 ; DarwinPIC: 	.align	2
 ; DarwinPIC: LCPI1_0:
-; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0+8)
+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC1_0+8)
 
 ; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
 ; DarwinPIC:	.align	2
@@ -61,7 +61,7 @@ define i32 @test1() {
 ; LinuxPIC: 	ldr r0, .LCPI1_0
 ; LinuxPIC: 	ldr r1, .LCPI1_1
 	
-; LinuxPIC: .LPC0:
+; LinuxPIC: .LPC1_0:
 ; LinuxPIC: 	add r0, pc, r0
 ; LinuxPIC: 	ldr r0, [r1, +r0]
 ; LinuxPIC: 	ldr r0, [r0]
@@ -69,7 +69,7 @@ define i32 @test1() {
 
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_0:
-; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0+8)
+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC1_0+8)
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index e9145ac..623f2cb 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define void @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmltfd sp!, {r7, pc}
+; CHECK: ldmfdlt sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 5824115..d7fcf7d 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmhi | count 1
+; RUN:   grep ldmfdhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index f9cf88f..c60ad93 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmeq | count 1
+; RUN:   grep ldmfdeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 6cb8e7b..a7da834 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmne | count 1
+; RUN:   grep ldmfdne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
 
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index cf4a1ab..8b56f13 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -55,6 +55,6 @@ L1:                                               ; preds = %L2, %bb2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
-; ARM: .long L_foo_L5-(LPC{{.*}}+8)
-; THUMB: .long L_foo_L5-(LPC{{.*}}+4)
-; THUMB2: .long L_foo_L5
+; ARM: .long LBA4__foo__L5-(LPC{{.*}}+8)
+; THUMB: .long LBA4__foo__L5-(LPC{{.*}}+4)
+; THUMB2: .long LBA4__foo__L5
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 2796dec..c78872a 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 547bab7..130277b 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 85c8b5b..29c55c6 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -60,7 +60,7 @@ define double @f7(double %a, double %b) {
 ;CHECK: movlt
 ;CHECK: movlt
 ;CHECK-VFP: f7:
-;CHECK-VFP: fcpydmi
+;CHECK-VFP: vmovmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index f4b27a7..5ad7ecc 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: vstmia sp
-; CHECK: vldmia sp
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 50000e31..44a44af 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -15,11 +15,11 @@ declare double @fabs(double)
 define void @test_abs(float* %P, double* %D) {
 ;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fabss
+;CHECK: vabs.f32
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fabsd
+;CHECK: vabs.f64
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
@@ -39,10 +39,10 @@ define void @test_add(float* %P, double* %D) {
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fcvtds
+;CHECK: vcvt.f64.f32
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fcvtsd
+;CHECK: vcvt.f32.f64
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -54,7 +54,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
-;CHECK: fmscs
+;CHECK: vnmls.f32
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 define i32 @test_ftoi(float* %P1) {
 ;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftosizs
+;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) {
 define i32 @test_ftou(float* %P1) {
 ;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftouizs
+;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) {
 define i32 @test_dtoi(double* %P1) {
 ;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftosizd
+;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -88,14 +88,14 @@ define i32 @test_dtoi(double* %P1) {
 define i32 @test_dtou(double* %P1) {
 ;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftouizd
+;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
 ;CHECK: test_utod:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -103,7 +103,7 @@ define void @test_utod(double* %P1, i32 %X) {
 
 define void @test_utod2(double* %P1, i8 %X) {
 ;CHECK: test_utod2:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -141,7 +141,7 @@ define void @test_cmpfp0(float* %glob, i32 %X) {
 ;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
-;CHECK: fcmpezs
+;CHECK: vcmpe.f32
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index f0df798..5dd87d6 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -204,8 +204,8 @@ define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 
 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
 ;CHECK: test_vset_lanef32:
-;CHECK: fcpys
-;CHECK: fcpys
+;CHECK: vmov.f32
+;CHECK: vmov.f32
 entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index ed69f97..e4368d6 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -134,6 +134,26 @@ define <2 x i64> @v_movQi64() nounwind {
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define arm_apcscc void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d0, #0x80
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define arm_apcscc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d0, #0xB5
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+  ret void
+}
+
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 ;CHECK: vmovls8:
 ;CHECK: vmovl.s8