117 files changed, 2805 insertions, 223 deletions
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index ee93fde..2b7ccd8 100644
--- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
 
 	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
 	%struct.VEC2 = type { double, double, double }
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 98cab9a..3909c6a 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: fstdhi
+;CHECK: vstrhi.64
   store double %1, double* %y, align 4
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 6281775..5476d5f 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: fldd
+; CHECK: vldr.64
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
index c260b97..62f3786 100644
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
@@ -11,12 +11,15 @@ entry:
   %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
   store <4 x float> %quat.0, <4 x float>* %0
   %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+;CHECK: vmov.f32
+;CHECK: vmov.f32
   %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
   %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
   %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
   %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vmov
   %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
   %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
   %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 0000000..dd2845f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %1 = fmul float %0, undef                       ; <float> [#uses=2]
+  %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
+  %3 = fmul float %0, %1                          ; <float> [#uses=1]
+  %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
+  %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: fconsts s{{[0-9]+}}, #112
+  %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
+  %7 = fsub float %2, undef                       ; <float> [#uses=1]
+  %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
+  %9 = fadd float %2, undef                       ; <float> [#uses=3]
+  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %11 = fmul float %8, %10                        ; <float> [#uses=1]
+  %12 = fadd float undef, %11                     ; <float> [#uses=2]
+  %13 = fmul float undef, undef                   ; <float> [#uses=1]
+  %14 = fmul float %6, 0.000000e+00               ; <float> [#uses=1]
+  %15 = fadd float %13, %14                       ; <float> [#uses=1]
+  %16 = fmul float %9, %10                        ; <float> [#uses=1]
+  %17 = fadd float %15, %16                       ; <float> [#uses=2]
+  %18 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
+  %20 = fmul float undef, %10                     ; <float> [#uses=1]
+  %21 = fadd float %19, %20                       ; <float> [#uses=1]
+  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %23 = fmul float %5, %22                        ; <float> [#uses=1]
+  %24 = fadd float %23, undef                     ; <float> [#uses=1]
+  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %26 = fmul float %8, %25                        ; <float> [#uses=1]
+  %27 = fadd float %24, %26                       ; <float> [#uses=1]
+  %28 = fmul float %9, %25                        ; <float> [#uses=1]
+  %29 = fadd float undef, %28                     ; <float> [#uses=1]
+  %30 = fmul float %8, undef                      ; <float> [#uses=1]
+  %31 = fadd float undef, %30                     ; <float> [#uses=1]
+  %32 = fmul float %6, undef                      ; <float> [#uses=1]
+  %33 = fadd float undef, %32                     ; <float> [#uses=1]
+  %34 = fmul float %9, undef                      ; <float> [#uses=1]
+  %35 = fadd float %33, %34                       ; <float> [#uses=1]
+  %36 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %37 = fmul float %7, undef                      ; <float> [#uses=1]
+  %38 = fadd float %36, %37                       ; <float> [#uses=1]
+  %39 = fmul float undef, undef                   ; <float> [#uses=1]
+  %40 = fadd float %38, %39                       ; <float> [#uses=1]
+  store float %12, float* undef, align 8
+  store float %17, float* undef, align 4
+  store float %21, float* undef, align 8
+  store float %27, float* undef, align 8
+  store float %29, float* undef, align 4
+  store float %31, float* undef, align 8
+  store float %40, float* undef, align 8
+  store float %12, float* null, align 8
+  %41 = fmul float %17, undef                     ; <float> [#uses=1]
+  %42 = fadd float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float %35, undef                     ; <float> [#uses=1]
+  %44 = fadd float %42, %43                       ; <float> [#uses=1]
+  store float %44, float* null, align 4
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 0000000..efc4be1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+  br i1 undef, label %bb81, label %bb48
+
+bb48:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+  ret float 0.000000e+00
+
+bb81:                                             ; preds = %entry
+  ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 0000000..6cce02d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
+  %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
+  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %5 = fmul float %4, %2                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, undef                       ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = fmul float undef, %2                       ; <float> [#uses=1]
+  %10 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=1]
+  %12 = fmul float undef, %6                      ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, %8               ; <float> [#uses=1]
+  %14 = fadd float %12, %13                       ; <float> [#uses=1]
+  %15 = fmul float %1, %11                        ; <float> [#uses=1]
+  %16 = fadd float %14, %15                       ; <float> [#uses=1]
+  %17 = select i1 undef, float undef, float %16   ; <float> [#uses=1]
+  %18 = fdiv float %17, 0.000000e+00              ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  %19 = fmul float %4, undef                      ; <float> [#uses=1]
+  store float %19, float* %0, align 4
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 0000000..3ff6631
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+  %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %5 = fsub float %3, undef                       ; <float> [#uses=2]
+  %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %11 = fsub float %10, undef                     ; <float> [#uses=2]
+  %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %14 = fsub float %13, undef                     ; <float> [#uses=1]
+  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %16 = fsub float %15, undef                     ; <float> [#uses=1]
+  %17 = fmul float %5, %16                        ; <float> [#uses=1]
+  %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
+  %19 = fmul float %8, %11                        ; <float> [#uses=1]
+  %20 = fsub float %19, undef                     ; <float> [#uses=3]
+  %21 = fmul float %1, %14                        ; <float> [#uses=1]
+  %22 = fmul float %5, %11                        ; <float> [#uses=1]
+  %23 = fsub float %21, %22                       ; <float> [#uses=2]
+  store float %18, float* undef
+  %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+  store float %20, float* %24
+  store float %23, float* undef
+  %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+  %26 = fmul float %18, %18                       ; <float> [#uses=1]
+  %27 = fadd float %26, undef                     ; <float> [#uses=1]
+  %28 = fadd float %27, undef                     ; <float> [#uses=1]
+  %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
+  %30 = load float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %35 = fsub float %33, %34                       ; <float> [#uses=2]
+  %36 = fmul float %20, %35                       ; <float> [#uses=1]
+  %37 = fsub float %36, undef                     ; <float> [#uses=1]
+  %38 = fmul float %23, 0.000000e+00              ; <float> [#uses=1]
+  %39 = fmul float %18, %35                       ; <float> [#uses=1]
+  %40 = fsub float %38, %39                       ; <float> [#uses=1]
+  %41 = fmul float %18, 0.000000e+00              ; <float> [#uses=1]
+  %42 = fmul float %20, 0.000000e+00              ; <float> [#uses=1]
+  %43 = fsub float %41, %42                       ; <float> [#uses=1]
+  %44 = fmul float 0.000000e+00, %37              ; <float> [#uses=1]
+  %45 = fmul float %31, %40                       ; <float> [#uses=1]
+  %46 = fadd float %44, %45                       ; <float> [#uses=1]
+  %47 = fmul float %33, %43                       ; <float> [#uses=1]
+  %48 = fadd float %46, %47                       ; <float> [#uses=2]
+  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %50 = fsub float %30, %49                       ; <float> [#uses=1]
+  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %52 = fsub float %32, %51                       ; <float> [#uses=2]
+  %53 = load float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %55 = fmul float %54, undef                     ; <float> [#uses=1]
+  %56 = fmul float undef, %52                     ; <float> [#uses=1]
+  %57 = fsub float %55, %56                       ; <float> [#uses=1]
+  %58 = fmul float undef, %52                     ; <float> [#uses=1]
+  %59 = fmul float %54, %50                       ; <float> [#uses=1]
+  %60 = fsub float %58, %59                       ; <float> [#uses=1]
+  %61 = fmul float %30, %57                       ; <float> [#uses=1]
+  %62 = fmul float %32, 0.000000e+00              ; <float> [#uses=1]
+  %63 = fadd float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %34, %60                       ; <float> [#uses=1]
+  %65 = fadd float %63, %64                       ; <float> [#uses=2]
+  %66 = fcmp olt float %48, %65                   ; <i1> [#uses=1]
+  %67 = fsub float %49, 0.000000e+00              ; <float> [#uses=1]
+  %68 = fsub float %51, %31                       ; <float> [#uses=1]
+  %69 = fsub float %53, %33                       ; <float> [#uses=1]
+  %70 = fmul float undef, %67                     ; <float> [#uses=1]
+  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %72 = fmul float %71, %69                       ; <float> [#uses=1]
+  %73 = fsub float %70, %72                       ; <float> [#uses=1]
+  %74 = fmul float %71, %68                       ; <float> [#uses=1]
+  %75 = fsub float %74, 0.000000e+00              ; <float> [#uses=1]
+  %76 = fmul float %51, %73                       ; <float> [#uses=1]
+  %77 = fadd float undef, %76                     ; <float> [#uses=1]
+  %78 = fmul float %53, %75                       ; <float> [#uses=1]
+  %79 = fadd float %77, %78                       ; <float> [#uses=1]
+  %80 = select i1 %66, float %48, float %65       ; <float> [#uses=1]
+  %81 = select i1 undef, float %80, float %79     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+  %82 = fdiv float %81, %iftmp.164.0              ; <float> [#uses=1]
+  %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+  store float %iftmp.165.0, float* undef, align 4
+  br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 0000000..832ff4f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  br i1 false, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+  %5 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %8 = fsub float undef, undef                    ; <float> [#uses=1]
+  %9 = fmul float 0.000000e+00, %8                ; <float> [#uses=1]
+  %10 = fmul float %5, 0.000000e+00               ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=3]
+  %12 = fmul float %2, 0.000000e+00               ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %14 = fsub float %12, %13                       ; <float> [#uses=2]
+  store float %14, float* undef
+  %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+  store float 0.000000e+00, float* %15
+  %16 = fmul float %11, %11                       ; <float> [#uses=1]
+  %17 = fadd float %16, 0.000000e+00              ; <float> [#uses=1]
+  %18 = fadd float %17, undef                     ; <float> [#uses=1]
+  %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
+  %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
+  %21 = load float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %25 = fsub float %23, %24                       ; <float> [#uses=2]
+  %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
+  %27 = fsub float %26, undef                     ; <float> [#uses=1]
+  %28 = fmul float %14, 0.000000e+00              ; <float> [#uses=1]
+  %29 = fmul float %11, %25                       ; <float> [#uses=1]
+  %30 = fsub float %28, %29                       ; <float> [#uses=1]
+  %31 = fsub float undef, 0.000000e+00            ; <float> [#uses=1]
+  %32 = fmul float %21, %27                       ; <float> [#uses=1]
+  %33 = fmul float undef, %30                     ; <float> [#uses=1]
+  %34 = fadd float %32, %33                       ; <float> [#uses=1]
+  %35 = fmul float %23, %31                       ; <float> [#uses=1]
+  %36 = fadd float %34, %35                       ; <float> [#uses=1]
+  %37 = load float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %39 = fsub float %22, %38                       ; <float> [#uses=2]
+  %40 = load float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %42 = fmul float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float undef, %39                     ; <float> [#uses=1]
+  %44 = fsub float %42, %43                       ; <float> [#uses=1]
+  %45 = fmul float undef, %39                     ; <float> [#uses=1]
+  %46 = fmul float %41, 0.000000e+00              ; <float> [#uses=1]
+  %47 = fsub float %45, %46                       ; <float> [#uses=1]
+  %48 = fmul float 0.000000e+00, %44              ; <float> [#uses=1]
+  %49 = fmul float %22, undef                     ; <float> [#uses=1]
+  %50 = fadd float %48, %49                       ; <float> [#uses=1]
+  %51 = fmul float %24, %47                       ; <float> [#uses=1]
+  %52 = fadd float %50, %51                       ; <float> [#uses=1]
+  %53 = fsub float %37, %21                       ; <float> [#uses=2]
+  %54 = fmul float undef, undef                   ; <float> [#uses=1]
+  %55 = fmul float undef, undef                   ; <float> [#uses=1]
+  %56 = fsub float %54, %55                       ; <float> [#uses=1]
+  %57 = fmul float undef, %53                     ; <float> [#uses=1]
+  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %59 = fmul float %58, undef                     ; <float> [#uses=1]
+  %60 = fsub float %57, %59                       ; <float> [#uses=1]
+  %61 = fmul float %58, undef                     ; <float> [#uses=1]
+  %62 = fmul float undef, %53                     ; <float> [#uses=1]
+  %63 = fsub float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %37, %56                       ; <float> [#uses=1]
+  %65 = fmul float %38, %60                       ; <float> [#uses=1]
+  %66 = fadd float %64, %65                       ; <float> [#uses=1]
+  %67 = fmul float %40, %63                       ; <float> [#uses=1]
+  %68 = fadd float %66, %67                       ; <float> [#uses=1]
+  %69 = select i1 undef, float %36, float %52     ; <float> [#uses=1]
+  %70 = select i1 undef, float %69, float %68     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+  %71 = fdiv float %70, %iftmp.164.0              ; <float> [#uses=1]
+  store float %71, float* null, align 4
+  %72 = icmp eq %bar* null, %0                    ; <i1> [#uses=1]
+  br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index d8019a0..062133e 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
 
 define float @f(float %z, double %a, float %b) {
-; CHECK: fcpys s0, s1
+; CHECK: vmov.f32 s0, s1
         %tmp = call float @g(float %b)
         ret float %tmp
 }
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index 5f3ed1d..fac2bc5 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fcmpes
+; RUN:   grep vcmpe.f32
 
 define void @test3(float* %glob, i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 5690a01..46f136b 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index a01f868..1426a2d 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index bf7c305..a6d7410 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm | grep bic | count 2
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fneg | count 2
+; RUN:   grep vneg | count 2
 
 define float @test1(float %x, double %y) {
 	%tmp = fpext float %x to double
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 2af250d..45803f6 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index ebf1d84..6db2385 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index 5c31ea6..57efa82 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index c6e6d40..31b5c52 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index cb5dade..735263c 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 7da443d..bc3d42d 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 
 define float @test1(float* %a) {
 entry:
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 8fc13e7..724947e 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -4,14 +4,14 @@
 
 define float @test(float %acc, float %a, float %b) {
 entry:
-; VFP2: fnmacs
-; NEON: fnmacs
+; VFP2: vmls.f32
+; NEON: vmls.f32
 
 ; NEONFP-NOT: vmls
-; NEONFP-NOT: fcpys
+; NEONFP-NOT: vmov.f32
 ; NEONFP:     vmul.f32
 ; NEONFP:     vsub.f32
-; NEONFP:     fmrs
+; NEONFP:     vmov
 
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 3ae437d..ad21882 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
 
 define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
@@ -14,7 +14,7 @@ entry:
 }
 
 define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 613b347..6d7bc05 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 4e4ef72..8fbd45b 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -2,9 +2,9 @@
 
 define float @f(i32 %a) {
 ;CHECK: f:
-;CHECK: fmsr
-;CHECK-NEXT: fsitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -12,9 +12,9 @@ entry:
 
 define double @g(i32 %a) {
 ;CHECK: g:
-;CHECK: fmsr
-;CHECK-NEXT: fsitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -22,9 +22,9 @@ entry:
 
 define double @uint_to_double(i32 %a) {
 ;CHECK: uint_to_double:
-;CHECK: fmsr
-;CHECK-NEXT: fuitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -32,9 +32,9 @@ entry:
 
 define float @uint_to_float(i32 %a) {
 ;CHECK: uint_to_float:
-;CHECK: fmsr
-;CHECK-NEXT: fuitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -42,8 +42,8 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: fldd
-;CHECK-NEXT: fmrrd
+;CHECK: vldr.64 
+;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
@@ -58,13 +58,13 @@ entry:
 
 define double @f2(double %a) {
 ;CHECK: f2:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
         ret double %a
 }
 
 define void @f3() {
 ;CHECK: f3:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
 ;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 9ce2ac5..2adac78 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -6,7 +6,7 @@
 
 define i32 @test1(float %a, float %b) {
 ; VFP2: test1:
-; VFP2: ftosizs s0, s0
+; VFP2: vcvt.s32.f32 s0, s0
 ; NEON: test1:
 ; NEON: vcvt.s32.f32 d0, d0
 entry:
@@ -17,7 +17,7 @@ entry:
 
 define i32 @test2(float %a, float %b) {
 ; VFP2: test2:
-; VFP2: ftouizs s0, s0
+; VFP2: vcvt.u32.f32 s0, s0
 ; NEON: test2:
 ; NEON: vcvt.u32.f32 d0, d0
 entry:
@@ -28,7 +28,7 @@ entry:
 
 define float @test3(i32 %a, i32 %b) {
 ; VFP2: test3:
-; VFP2: fuitos s0, s0
+; VFP2: vcvt.f32.u32 s0, s0
 ; NEON: test3:
 ; NEON: vcvt.f32.u32 d0, d0
 entry:
@@ -39,7 +39,7 @@ entry:
 
 define float @test4(i32 %a, i32 %b) {
 ; VFP2: test4:
-; VFP2: fsitos s0, s0
+; VFP2: vcvt.f32.s32 s0, s0
 ; NEON: test4:
 ; NEON: vcvt.f32.s32 d0, d0
 entry:
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ebeeb18..ce6d6b2 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -2,7 +2,7 @@
 
 define float @f1(float %a, float %b) {
 ;CHECK: f1:
-;CHECK: fadds
+;CHECK: vadd.f32
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -10,7 +10,7 @@ entry:
 
 define double @f2(double %a, double %b) {
 ;CHECK: f2:
-;CHECK: faddd
+;CHECK: vadd.f64
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -18,7 +18,7 @@ entry:
 
 define float @f3(float %a, float %b) {
 ;CHECK: f3:
-;CHECK: fmuls
+;CHECK: vmul.f32
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -26,7 +26,7 @@ entry:
 
 define double @f4(double %a, double %b) {
 ;CHECK: f4:
-;CHECK: fmuld
+;CHECK: vmul.f64
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -34,7 +34,7 @@ entry:
 
 define float @f5(float %a, float %b) {
 ;CHECK: f5:
-;CHECK: fsubs
+;CHECK: vsub.f32
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -42,7 +42,7 @@ entry:
 
 define double @f6(double %a, double %b) {
 ;CHECK: f6:
-;CHECK: fsubd
+;CHECK: vsub.f64
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -58,7 +58,7 @@ entry:
 
 define double @f8(double %a) {
 ;CHECK: f8:
-;CHECK: fnegd
+;CHECK: vneg.f64
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
@@ -66,7 +66,7 @@ entry:
 
 define float @f9(float %a, float %b) {
 ;CHECK: f9:
-;CHECK: fdivs
+;CHECK: vdiv.f32
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
@@ -74,7 +74,7 @@ entry:
 
 define double @f10(double %a, double %b) {
 ;CHECK: f10:
-;CHECK: fdivd
+;CHECK: vdiv.f64
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
@@ -92,7 +92,7 @@ declare float @fabsf(float)
 
 define double @f12(double %a) {
 ;CHECK: f12:
-;CHECK: fabsd
+;CHECK: vabs.f64
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index 2c9591c..260ec49 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(float %a) {
 ;CHECK: f1:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -12,7 +12,7 @@ entry:
 
 define i32 @f2(float %a) {
 ;CHECK: f2:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -22,7 +22,7 @@ entry:
 
 define i32 @f3(float %a) {
 ;CHECK: f3:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -32,7 +32,7 @@ entry:
 
 define i32 @f4(float %a) {
 ;CHECK: f4:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 
 define i32 @f5(float %a) {
 ;CHECK: f5:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -52,7 +52,7 @@ entry:
 
 define i32 @f6(float %a) {
 ;CHECK: f6:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -62,7 +62,7 @@ entry:
 
 define i32 @g1(double %a) {
 ;CHECK: g1:
-;CHECK: fcmped
+;CHECK: vcmpe.f64
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index ee3c338..bf197a4 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -3,7 +3,7 @@
 
 define float @f1(double %x) {
 ;CHECK-VFP: f1:
-;CHECK-VFP: fcvtsd
+;CHECK-VFP: vcvt.f32.f64
 ;CHECK: f1:
 ;CHECK: truncdfsf2
 entry:
@@ -13,7 +13,7 @@ entry:
 
 define double @f2(float %x) {
 ;CHECK-VFP: f2:
-;CHECK-VFP: fcvtds
+;CHECK-VFP: vcvt.f64.f32
 ;CHECK: f2:
 ;CHECK: extendsfdf2
 entry:
@@ -23,7 +23,7 @@ entry:
 
 define i32 @f3(float %x) {
 ;CHECK-VFP: f3:
-;CHECK-VFP: ftosizs
+;CHECK-VFP: vcvt.s32.f32
 ;CHECK: f3:
 ;CHECK: fixsfsi
 entry:
@@ -33,7 +33,7 @@ entry:
 
 define i32 @f4(float %x) {
 ;CHECK-VFP: f4:
-;CHECK-VFP: ftouizs
+;CHECK-VFP: vcvt.u32.f32
 ;CHECK: f4:
 ;CHECK: fixunssfsi
 entry:
@@ -43,7 +43,7 @@ entry:
 
 define i32 @f5(double %x) {
 ;CHECK-VFP: f5:
-;CHECK-VFP: ftosizd
+;CHECK-VFP: vcvt.s32.f64
 ;CHECK: f5:
 ;CHECK: fixdfsi
 entry:
@@ -53,7 +53,7 @@ entry:
 
 define i32 @f6(double %x) {
 ;CHECK-VFP: f6:
-;CHECK-VFP: ftouizd
+;CHECK-VFP: vcvt.u32.f64
 ;CHECK: f6:
 ;CHECK: fixunsdfsi
 entry:
@@ -63,7 +63,7 @@ entry:
 
 define float @f7(i32 %a) {
 ;CHECK-VFP: f7:
-;CHECK-VFP: fsitos
+;CHECK-VFP: vcvt.f32.s32
 ;CHECK: f7:
 ;CHECK: floatsisf
 entry:
@@ -73,7 +73,7 @@ entry:
 
 define double @f8(i32 %a) {
 ;CHECK-VFP: f8:
-;CHECK-VFP: fsitod
+;CHECK-VFP: vcvt.f64.s32
 ;CHECK: f8:
 ;CHECK: floatsidf
 entry:
@@ -83,7 +83,7 @@ entry:
 
 define float @f9(i32 %a) {
 ;CHECK-VFP: f9:
-;CHECK-VFP: fuitos
+;CHECK-VFP: vcvt.f32.u32
 ;CHECK: f9:
 ;CHECK: floatunsisf
 entry:
@@ -93,7 +93,7 @@ entry:
 
 define double @f10(i32 %a) {
 ;CHECK-VFP: f10:
-;CHECK-VFP: fuitod
+;CHECK-VFP: vcvt.f64.u32
 ;CHECK: f10:
 ;CHECK: floatunsidf
 entry:
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 0822fbf..c3cff18 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: flds{{.*}}[
+; CHECK: vldr.32{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: fsts{{.*}}[
+; CHECK: vstr.32{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 0d270b0..4cacc5d 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep -E {vmov\\W*r\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index 060dd46..f84ccdd 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 8ed58bd..886c0d5 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -40,14 +40,14 @@ define i32 @test1() {
 
 ; DarwinPIC: _test1:
 ; DarwinPIC: 	ldr r0, LCPI1_0
-; DarwinPIC: LPC0:
+; DarwinPIC: LPC1_0:
 ; DarwinPIC:    ldr r0, [pc, +r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
 ; DarwinPIC: 	.align	2
 ; DarwinPIC: LCPI1_0:
-; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0+8)
+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC1_0+8)
 
 ; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
 ; DarwinPIC:	.align	2
@@ -61,7 +61,7 @@ define i32 @test1() {
 ; LinuxPIC: 	ldr r0, .LCPI1_0
 ; LinuxPIC: 	ldr r1, .LCPI1_1
 	
-; LinuxPIC: .LPC0:
+; LinuxPIC: .LPC1_0:
 ; LinuxPIC: 	add r0, pc, r0
 ; LinuxPIC: 	ldr r0, [r1, +r0]
 ; LinuxPIC: 	ldr r0, [r0]
@@ -69,7 +69,7 @@ define i32 @test1() {
 
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_0:
-; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0+8)
+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC1_0+8)
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index e9145ac..623f2cb 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define void @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmltfd sp!, {r7, pc}
+; CHECK: ldmfdlt sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 5824115..d7fcf7d 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmhi | count 1
+; RUN:   grep ldmfdhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index f9cf88f..c60ad93 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmeq | count 1
+; RUN:   grep ldmfdeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 6cb8e7b..a7da834 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmne | count 1
+; RUN:   grep ldmfdne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
 
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index cf4a1ab..8b56f13 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -55,6 +55,6 @@ L1:                                               ; preds = %L2, %bb2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
-; ARM: .long L_foo_L5-(LPC{{.*}}+8)
-; THUMB: .long L_foo_L5-(LPC{{.*}}+4)
-; THUMB2: .long L_foo_L5
+; ARM: .long LBA4__foo__L5-(LPC{{.*}}+8)
+; THUMB: .long LBA4__foo__L5-(LPC{{.*}}+4)
+; THUMB2: .long LBA4__foo__L5
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 2796dec..c78872a 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 547bab7..130277b 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 85c8b5b..29c55c6 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -60,7 +60,7 @@ define double @f7(double %a, double %b) {
 ;CHECK: movlt
 ;CHECK: movlt
 ;CHECK-VFP: f7:
-;CHECK-VFP: fcpydmi
+;CHECK-VFP: vmovmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index f4b27a7..5ad7ecc 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: vstmia sp
-; CHECK: vldmia sp
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 50000e31..44a44af 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -15,11 +15,11 @@ declare double @fabs(double)
 define void @test_abs(float* %P, double* %D) {
 ;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fabss
+;CHECK: vabs.f32
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fabsd
+;CHECK: vabs.f64
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
@@ -39,10 +39,10 @@ define void @test_add(float* %P, double* %D) {
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fcvtds
+;CHECK: vcvt.f64.f32
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fcvtsd
+;CHECK: vcvt.f32.f64
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -54,7 +54,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
-;CHECK: fmscs
+;CHECK: vnmls.f32
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 define i32 @test_ftoi(float* %P1) {
 ;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftosizs
+;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) {
 define i32 @test_ftou(float* %P1) {
 ;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftouizs
+;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) {
 define i32 @test_dtoi(double* %P1) {
 ;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftosizd
+;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -88,14 +88,14 @@ define i32 @test_dtoi(double* %P1) {
 define i32 @test_dtou(double* %P1) {
 ;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftouizd
+;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
 ;CHECK: test_utod:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -103,7 +103,7 @@ define void @test_utod(double* %P1, i32 %X) {
 
 define void @test_utod2(double* %P1, i8 %X) {
 ;CHECK: test_utod2:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -141,7 +141,7 @@ define void @test_cmpfp0(float* %glob, i32 %X) {
 ;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
-;CHECK: fcmpezs
+;CHECK: vcmpe.f32
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index f0df798..5dd87d6 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -204,8 +204,8 @@ define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 
 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
 ;CHECK: test_vset_lanef32:
-;CHECK: fcpys
-;CHECK: fcpys
+;CHECK: vmov.f32
+;CHECK: vmov.f32
 entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index ed69f97..e4368d6 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -134,6 +134,26 @@ define <2 x i64> @v_movQi64() nounwind {
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define arm_apcscc void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d0, #0x80
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define arm_apcscc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d0, #0xB5
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+  ret void
+}
+
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 ;CHECK: vmovls8:
 ;CHECK: vmovl.s8
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
new file mode 100644
index 0000000..a51c75d
--- /dev/null
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s
+; PR5495
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
+%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
+%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i32 }
+%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+%union..0._15 = type { i32 }
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
+
+define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) {
+entry:
+  %0 = invoke %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"* undef)
+          to label %invcont8 unwind label %lpad74 ; <%"struct.std::ctype<char>"*> [#uses=0]
+
+invcont8:                                         ; preds = %entry
+  %1 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %bb26.preheader unwind label %lpad ; <i32> [#uses=0]
+
+bb26.preheader:                                   ; preds = %invcont8
+  br label %invcont38
+
+bb1.i100:                                         ; preds = %invcont38
+  %2 = add nsw i32 1, %__extracted.0  ; <i32> [#uses=3]
+  br i1 undef, label %bb.i97, label %bb1.i
+
+bb.i97:                                           ; preds = %bb1.i100
+  br label %invcont38
+
+bb1.i:                                            ; preds = %bb1.i100
+  %3 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont38 unwind label %lpad ; <i32> [#uses=0]
+
+invcont24:                                        ; preds = %invcont38
+  %4 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i unwind label %lpad ; <i32> [#uses=0]
+
+_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i: ; preds = %invcont24
+  br i1 undef, label %invcont25, label %bb.i93
+
+bb.i93:                                           ; preds = %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  %5 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont25 unwind label %lpad ; <i32> [#uses=0]
+
+invcont25:                                        ; preds = %bb.i93, %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  br label %invcont38
+
+invcont38:                                        ; preds = %invcont25, %bb1.i, %bb.i97, %bb26.preheader
+  %__extracted.0 = phi i32 [ 0, %bb26.preheader ], [ undef, %invcont25 ], [ %2, %bb.i97 ], [ %2, %bb1.i ] ; <i32> [#uses=1]
+  br i1 false, label %bb1.i100, label %invcont24
+
+lpad:                                             ; preds = %bb.i93, %invcont24, %bb1.i, %invcont8
+  %__extracted.1 = phi i32 [ 0, %invcont8 ], [ %2, %bb1.i ], [ undef, %bb.i93 ], [ undef, %invcont24 ] ; <i32> [#uses=0]
+  %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1]
+  %6 = call i8* @__cxa_begin_catch(i8* %eh_ptr) nounwind ; <i8*> [#uses=0]
+  unreachable
+
+lpad74:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/Generic/switch-lower.ll b/test/CodeGen/Generic/switch-lower.ll
index eb240ed..1cefe82 100644
--- a/test/CodeGen/Generic/switch-lower.ll
+++ b/test/CodeGen/Generic/switch-lower.ll
@@ -1,8 +1,22 @@
 ; RUN: llc < %s
-; PR1197
 
 
-define void @exp_attr__expand_n_attribute_reference() {
+; PR5421
+define void @test1() {
+entry:
+  switch i128 undef, label %exit [
+    i128 55340232221128654848, label %exit
+    i128 92233720368547758080, label %exit
+    i128 73786976294838206464, label %exit
+    i128 147573952589676412928, label %exit
+  ]
+exit:
+  unreachable
+}
+
+
+; PR1197
+define void @test2() {
 entry:
 	br i1 false, label %cond_next954, label %cond_true924
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index cc574c7..4d7d9b9 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-unknown-unknown"
 
-@"\010x0021" = common global i8 0, align 1        ; <i8*> [#uses=2]
+@"\010x0021" = external global i8, align 1        ; <i8*> [#uses=2]
 
 define zeroext i8 @foo(i8 zeroext %x) nounwind {
 entry:
diff --git a/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
new file mode 100644
index 0000000..94fe5c7
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+@g_29 = common global i8 0, align 1               ; <i8*> [#uses=0]
+
+define signext i8 @foo(i8 signext %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: call #__mulqi3
+  %mul = mul i8 %_si2, %_si1                      ; <i8> [#uses=1]
+  ret i8 %mul
+}
+
+define void @uint81(i16* nocapture %p_32) nounwind {
+entry:
+  %call = tail call i16 @bar(i8* bitcast (i8 (i8, i8)* @foo to i8*)) nounwind ; <i16> [#uses=0]
+  ret void
+}
+
+declare i16 @bar(i8*)
diff --git a/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
new file mode 100644
index 0000000..d232aea
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+%struct.httpd_fs_file = type { i8*, i16 }
+%struct.psock = type { %struct.pt, %struct.pt, i8*, i8*, i8*, i16, i16, %struct.httpd_fs_file, i16, i8, i8 }
+%struct.pt = type { i16 }
+
+@foo = external global i8*
+
+define signext i8 @psock_readto(%struct.psock* nocapture %psock, i8 zeroext %c) nounwind {
+entry:
+  switch i16 undef, label %sw.epilog [
+    i16 0, label %sw.bb
+    i16 283, label %if.else.i
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %while.cond36.i, %while.end.i, %sw.bb
+  br label %while.cond.i
+
+if.else.i:                                        ; preds = %entry
+  br i1 undef, label %psock_newdata.exit, label %if.else11.i
+
+if.else11.i:                                      ; preds = %if.else.i
+  ret i8 0
+
+psock_newdata.exit:                               ; preds = %if.else.i
+  ret i8 0
+
+while.cond.i:                                     ; preds = %while.body.i, %do.body
+  br i1 undef, label %while.end.i, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %do.end41, label %while.cond.i
+
+while.end.i:                                      ; preds = %while.cond.i
+  br i1 undef, label %do.body, label %while.cond36.i.preheader
+
+while.cond36.i.preheader:                         ; preds = %while.end.i
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %while.body41.i, %while.cond36.i.preheader
+  br i1 undef, label %do.body, label %while.body41.i
+
+while.body41.i:                                   ; preds = %while.cond36.i
+  %tmp43.i = load i8** @foo                      ; <i8*> [#uses=2]
+  %tmp44.i = load i8* %tmp43.i                    ; <i8> [#uses=1]
+  %ptrincdec50.i = getelementptr inbounds i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
+  store i8* %ptrincdec50.i, i8** @foo
+  %cmp55.i = icmp eq i8 %tmp44.i, %c              ; <i1> [#uses=1]
+  br i1 %cmp55.i, label %do.end41, label %while.cond36.i
+
+do.end41:                                         ; preds = %while.body41.i, %while.body.i
+  br i1 undef, label %if.then46, label %sw.epilog
+
+if.then46:                                        ; preds = %do.end41
+  ret i8 0
+
+sw.epilog:                                        ; preds = %do.end41, %entry
+  ret i8 2
+}
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
new file mode 100644
index 0000000..3340494
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -0,0 +1,74 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16 %x, i16* %a) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am1:
+; CHECK:		bis.w	0(r14), r15
+
+@foo = external global i16
+
+define i16 @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am2:
+; CHECK:		bis.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	%3 = or i8 %2,%x
+	ret i8 %3
+}
+; CHECK: am3:
+; CHECK:		bis.b	&bar(r14), r15
+
+define i16 @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am4:
+; CHECK:		bis.w	&32, r15
+
+define i16 @am5(i16 %x, i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %2,%x
+	ret i16 %3
+}
+; CHECK: am5:
+; CHECK:		bis.w	4(r14), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am6:
+; CHECK:		bis.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	%4 = or i8 %3,%x
+	ret i8 %4
+}
+; CHECK: am7:
+; CHECK:		bis.b	&duh+2(r14), r15
+
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
new file mode 100644
index 0000000..ca79fb6
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -0,0 +1,81 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %x) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %x, %1
+	store i16 %2, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		bis.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %x, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		bis.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %x) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	%2 = load i8* %1
+	%3 = or i8 %x, %2
+	store i8 %3, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		bis.b	r14, &bar(r15)
+
+define void @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %x, %1
+	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		bis.w	r15, &32
+
+define void @am5(i16* %a, i16 %x) readonly {
+	%1 = getelementptr inbounds i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %x, %2
+	store i16 %3, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		bis.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer
+
+define void @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %x, %1
+	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		bis.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %x) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3 = load i8* %2
+	%4 = or i8 %x, %3
+	store i8 %4, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		bis.b	r14, &duh+2(r15)
+
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
new file mode 100644
index 0000000..67cbb02
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16* %a) nounwind {
+	%1 = load i16* %a
+	ret i16 %1
+}
+; CHECK: am1:
+; CHECK:		mov.w	0(r15), r15
+
+@foo = external global i16
+
+define i16 @am2() nounwind {
+	%1 = load i16* @foo
+	ret i16 %1
+}
+; CHECK: am2:
+; CHECK:		mov.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	ret i8 %2
+}
+; CHECK: am3:
+; CHECK:		mov.b	&bar(r15), r15
+
+define i16 @am4() nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	ret i16 %1
+}
+; CHECK: am4:
+; CHECK:		mov.w	&32, r15
+
+define i16 @am5(i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	ret i16 %2
+}
+; CHECK: am5:
+; CHECK:		mov.w	4(r15), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6() nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret i16 %1
+}
+; CHECK: am6:
+; CHECK:		mov.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	ret i8 %3
+}
+; CHECK: am7:
+; CHECK:		mov.b	&duh+2(r15), r15
+
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
new file mode 100644
index 0000000..b8155d3
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %b) nounwind {
+	store i16 %b, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		mov.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %a) nounwind {
+	store i16 %a, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		mov.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %a) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	store i8 %a, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		mov.b	r14, &bar(r15)
+
+define void @am4(i16 %a) nounwind {
+	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		mov.w	r15, &32
+
+define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
+	%1 = getelementptr inbounds i16* %p, i16 2
+	store i16 %a, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		mov.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define void @am6(i16 %a) nounwind {
+	store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		mov.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %a) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	store i8 %a, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		mov.b	r14, &duh+2(r15)
+
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
index 53334aa..2613f01 100644
--- a/test/CodeGen/MSP430/Inst16mr.ll
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -37,6 +37,16 @@ define void @bis(i16 %a) nounwind {
 	ret void
 }
 
+define void @bic(i16 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.w   r15, &foo
+        %1 = xor i16 %m, -1
+        %2 = load i16* @foo
+        %3 = and i16 %2, %1
+        store i16 %3, i16* @foo
+        ret void
+}
+
 define void @xor(i16 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r15, &foo
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
index d0cb0d1..02e89c7 100644
--- a/test/CodeGen/MSP430/Inst16rm.ll
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -19,7 +19,6 @@ define i16 @and(i16 %a) nounwind {
 	ret i16 %2
 }
 
-
 define i16 @bis(i16 %a) nounwind {
 ; CHECK: bis:
 ; CHECK: bis.w	&foo, r15
@@ -28,6 +27,15 @@ define i16 @bis(i16 %a) nounwind {
 	ret i16 %2
 }
 
+define i16  @bic(i16 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	&foo, r15
+        %1 = load i16* @foo
+        %2 = xor i16 %1, -1
+        %3 = and i16 %a, %2
+        ret i16 %3
+}
+
 define i16 @xor(i16 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	&foo, r15
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
index 6619c51..2f1ba5b 100644
--- a/test/CodeGen/MSP430/Inst16rr.ll
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -29,6 +29,14 @@ define i16 @bis(i16 %a, i16 %b) nounwind {
 	ret i16 %1
 }
 
+define i16 @bic(i16 %a, i16 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	r14, r15
+        %1 = xor i16 %b, -1
+        %2 = and i16 %a, %1
+        ret i16 %2
+}
+
 define i16 @xor(i16 %a, i16 %b) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r14, r15
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
index 04c681e..428d1fa 100644
--- a/test/CodeGen/MSP430/Inst8mr.ll
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -37,6 +37,16 @@ define void @bis(i8 %a) nounwind {
 	ret void
 }
 
+define void @bic(i8 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.b   r15, &foo
+        %1 = xor i8 %m, -1
+        %2 = load i8* @foo
+        %3 = and i8 %2, %1
+        store i8 %3, i8* @foo
+        ret void
+}
+
 define void @xor(i8 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.b	r15, &foo
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
index 62a5d4b..c062f04 100644
--- a/test/CodeGen/MSP430/Inst8rm.ll
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -19,7 +19,6 @@ define i8 @and(i8 %a) nounwind {
 	ret i8 %2
 }
 
-
 define i8 @bis(i8 %a) nounwind {
 ; CHECK: bis:
 ; CHECK: bis.b	&foo, r15
@@ -28,6 +27,15 @@ define i8 @bis(i8 %a) nounwind {
 	ret i8 %2
 }
 
+define i8  @bic(i8 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  &foo, r15
+        %1 = load i8* @foo
+        %2 = xor i8 %1, -1
+        %3 = and i8 %a, %2
+        ret i8 %3
+}
+
 define i8 @xor(i8 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.b	&foo, r15
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 90ea945..74feaae 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -29,6 +29,14 @@ define i8 @bis(i8 %a, i8 %b) nounwind {
 	ret i8 %1
 }
 
+define i8 @bic(i8 %a, i8 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  r14, r15
+        %1 = xor i8 %b, -1
+        %2 = and i8 %a, %1
+        ret i8 %2
+}
+
 define i8 @xor(i8 %a, i8 %b) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r14, r15
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
index 2cc25a4..0e7886a 100644
--- a/test/CodeGen/MSP430/inline-asm.ll
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -20,6 +20,7 @@ define void @immmem() nounwind {
 }
 
 define void @mem() nounwind {
-        call void asm sideeffect "bic\09$0,r2", "m"(i16* @foo) nounwind
+        %fooval = load i16* @foo
+        call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
         ret void
 }
diff --git a/test/CodeGen/MSP430/postinc.ll b/test/CodeGen/MSP430/postinc.ll
new file mode 100644
index 0000000..8f01b83
--- /dev/null
+++ b/test/CodeGen/MSP430/postinc.ll
@@ -0,0 +1,114 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430"
+
+define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: add:
+; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @sub(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: sub:
+; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @or(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: or:
+; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @xor(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: xor:
+; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @and(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: and:
+; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index c41d521..1244a3e 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,6 +1,7 @@
 ; Double return in abicall (default)
 ; RUN: llc < %s -march=mips
 ; PR2615
+; XFAIL: *
 
 define double @main(...) {
 entry:
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
new file mode 100644
index 0000000..f59639f
--- /dev/null
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+define float @h() nounwind readnone {
+entry:
+; CHECK: lui $2, %hi($CPI1_0)
+; CHECK: lwc1 $f0, %lo($CPI1_0)($2)
+  ret float 0x400B333340000000
+}
diff --git a/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
new file mode 100644
index 0000000..2d9d16a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+define void @gcov_exit() nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  br i1 undef, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  br i1 undef, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  br i1 undef, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  %c_ix.0.3 = phi i32 [ undef, %bb48.3 ], [ undef, %bb49.2 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  store i64* undef, i64** undef, align 4
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  %0 = getelementptr inbounds [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
+  br label %bb51
+}
diff --git a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
new file mode 100644
index 0000000..54f4b2e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
+%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
+@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
+@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
+@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
+@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
+@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
+
+declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
+  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
+  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  store i8 52, i8* undef, align 1
+  store i8 42, i8* undef, align 1
+  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
+  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
+  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
+  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
+  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
+  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
+  br i1 %10, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
+  br i1 %11, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  call void %7(i64* null, i32 %5) nounwind
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
+  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
+  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
+  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
+  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
+  br i1 %17, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  br label %bb51
+}
+
+declare i32 @__gcov_read_unsigned() nounwind
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ee0a02b..1b302e41 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,12 +43,12 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r2, ha16(L_foo_L5-"L1$pb")
-; PIC: li r5, lo16(L_foo_L5-"L1$pb")
+; PIC: addis r4, r2, ha16(LBA4__foo__L5-"L1$pb")
+; PIC: li r5, lo16(LBA4__foo__L5-"L1$pb")
 ; PIC: add r4, r4, r5
 ; PIC: stw r4
-; STATIC: li r2, lo16(L_foo_L5)
-; STATIC: addis r2, r2, ha16(L_foo_L5)
+; STATIC: li r2, lo16(LBA4__foo__L5)
+; STATIC: addis r2, r2, ha16(LBA4__foo__L5)
 ; STATIC: stw r2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll
new file mode 100644
index 0000000..581d010
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s
+
+define i32 @_Z4funci(i32 %a) ssp {
+; CHECK:       mflr r0
+; CHECK-NEXT:  stw r31, 20(r1)
+; CHECK-NEXT:  stw r0, 8(r1)
+; CHECK-NEXT:  stwu r1, -80(r1)
+; CHECK-NEXT: Llabel1:
+; CHECK-NEXT:  mr r31, r1
+; CHECK-NEXT: Llabel2:
+entry:
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %a, i32* %a_addr
+  %1 = call i32 @_Z3barPi(i32* %a_addr)           ; <i32> [#uses=1]
+  store i32 %1, i32* %0, align 4
+  %2 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %2, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+declare i32 @_Z3barPi(i32*)
diff --git a/test/CodeGen/PowerPC/vec_auto_constant.ll b/test/CodeGen/PowerPC/vec_auto_constant.ll
new file mode 100644
index 0000000..973f089
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_auto_constant.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly produced .long, 7320806 (partial)
+; CHECK: .byte  22
+; CHECK: .byte  21
+; CHECK: .byte  20
+; CHECK: .byte  3
+; CHECK: .byte  25
+; CHECK: .byte  24
+; CHECK: .byte  23
+; CHECK: .byte  3
+; CHECK: .byte  28
+; CHECK: .byte  27
+; CHECK: .byte  26
+; CHECK: .byte  3
+; CHECK: .byte  31
+; CHECK: .byte  30
+; CHECK: .byte  29
+; CHECK: .byte  3
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_splat_constant.ll b/test/CodeGen/PowerPC/vec_splat_constant.ll
new file mode 100644
index 0000000..b227794
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_splat_constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly incorrectly inserted vsldoi (endian confusion)
+
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+; CHECK: _foo:
+; CHECK-NOT: vsldoi
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: blr
+}
diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/Thumb/machine-licm.ll
new file mode 100644
index 0000000..dae1412b
--- /dev/null
+++ b/test/CodeGen/Thumb/machine-licm.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+; rdar://7353541
+; rdar://7354376
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define arm_apcscc void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; CHECK: t:
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; CHECK: BB#1
+; CHECK: ldr.n r2, LCPI1_0
+; CHECK: add r2, pc
+; CHECK: ldr r{{[0-9]+}}, [r2]
+; CHECK: LBB1_2
+; CHECK: LCPI1_0:
+; CHECK-NOT: LCPI1_1:
+; CHECK: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index e84e867..8d03b52 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -6,10 +6,8 @@
 
 define arm_apcscc void @t() nounwind {
 ; CHECK: t:
-; CHECK:      ittt eq
-; CHECK-NEXT: addeq
-; CHECK-NEXT: movweq
-; CHECK-NEXT: movteq
+; CHECK:      it eq
+; CHECK-NEXT: cmpeq
 entry:
   %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
   br i1 undef, label %land.rhs, label %lor.end
diff --git a/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
new file mode 100644
index 0000000..9f2e399
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i32, i16, i16, i8, i8 }
+%struct.SV = type { i8*, i32, i32 }
+
+declare arm_apcscc void @Perl_mg_set(%struct.SV*) nounwind
+
+define arm_apcscc %struct.OP* @Perl_pp_complement() nounwind {
+entry:
+  %0 = load %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
+  br i1 undef, label %bb21, label %bb5
+
+bb5:                                              ; preds = %entry
+  br i1 undef, label %bb13, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb6
+  %1 = getelementptr inbounds %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 4                      ; <i8*> [#uses=1]
+  %3 = getelementptr inbounds i8* %2, i32 12      ; <i8*> [#uses=1]
+  %4 = bitcast i8* %3 to i32*                     ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=1]
+  %storemerge5 = xor i32 %5, -1                   ; <i32> [#uses=1]
+  call arm_apcscc  void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
+  %6 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %8 = and i32 %7, 16384                          ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb12, label %bb11
+
+bb8:                                              ; preds = %bb6
+  unreachable
+
+bb11:                                             ; preds = %bb7
+  call arm_apcscc  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb7
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb13:                                             ; preds = %bb5
+  %10 = call arm_apcscc  i32 @Perl_sv_2uv(%struct.SV* %0) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:                                             ; preds = %bb13
+  call arm_apcscc  void @Perl_sv_setiv(%struct.SV* undef, i32 undef) nounwind
+  br label %Perl_sv_setuv.exit
+
+bb1.i:                                            ; preds = %bb13
+  br label %Perl_sv_setuv.exit
+
+Perl_sv_setuv.exit:                               ; preds = %bb1.i, %bb.i
+  %11 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %13 = and i32 %12, 16384                        ; <i32> [#uses=1]
+  %14 = icmp eq i32 %13, 0                        ; <i1> [#uses=1]
+  br i1 %14, label %bb20, label %bb19
+
+bb19:                                             ; preds = %Perl_sv_setuv.exit
+  call arm_apcscc  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %Perl_sv_setuv.exit
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb21:                                             ; preds = %entry
+  br i1 undef, label %bb23, label %bb22
+
+bb22:                                             ; preds = %bb21
+  unreachable
+
+bb23:                                             ; preds = %bb21
+  unreachable
+
+bb44:                                             ; preds = %bb20, %bb12
+  ret %struct.OP* undef
+}
+
+declare arm_apcscc void @Perl_sv_setiv(%struct.SV*, i32) nounwind
+
+declare arm_apcscc i32 @Perl_sv_2uv(%struct.SV*) nounwind
diff --git a/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll b/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
new file mode 100644
index 0000000..8a67bb1
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; rdar://7394794
+
+define arm_apcscc void @lshift_double(i64 %l1, i64 %h1, i64 %count, i32 %prec, i64* nocapture %lv, i64* nocapture %hv, i32 %arith) nounwind {
+entry:
+  %..i = select i1 false, i64 0, i64 0            ; <i64> [#uses=1]
+  br i1 undef, label %bb11.i, label %bb6.i
+
+bb6.i:                                            ; preds = %entry
+  %0 = lshr i64 %h1, 0                            ; <i64> [#uses=1]
+  store i64 %0, i64* %hv, align 4
+  %1 = lshr i64 %l1, 0                            ; <i64> [#uses=1]
+  %2 = or i64 0, %1                               ; <i64> [#uses=1]
+  store i64 %2, i64* %lv, align 4
+  br label %bb11.i
+
+bb11.i:                                           ; preds = %bb6.i, %entry
+  store i64 %..i, i64* %lv, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 4320328..eefbae5 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 4
 
 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
 entry:
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 865b17b..18d507c 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -18,7 +18,7 @@ define void @test2() {
 define i32 @test3() {
 ; CHECK: test3:
 ; CHECK: sub.w sp, sp, #805306368
-; CHECK: sub sp, #4 * 4
+; CHECK: sub sp, #6 * 4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 4fd4525..9286670 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -14,7 +14,7 @@ define i32 @test1() {
 
 ; PIC: _test1
 ; PIC: add r0, pc
-; PIC: .long L_G$non_lazy_ptr-(LPC0+4)
+; PIC: .long L_G$non_lazy_ptr-(LPC1_0+4)
 
 ; LINUX: test1
 ; LINUX: .long G(GOT)
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
new file mode 100644
index 0000000..7b1b57a
--- /dev/null
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
+; rdar://7387640
+
+; FIXME: We still need to rewrite array reference iv of stride -4 with loop
+; count iv of stride -1.
+
+@G = external global i32                          ; <i32*> [#uses=2]
+@array = external global i32*                     ; <i32**> [#uses=1]
+
+define arm_apcscc void @t() nounwind optsize {
+; CHECK: t:
+; CHECK: mov.w r2, #4000
+; CHECK: movw r3, #1001
+entry:
+  %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: LBB1_1:
+; CHECK: subs r3, #1
+; CHECK: cmp r3, #0
+; CHECK: sub.w r2, r2, #4
+  %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+  %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
+  %1 = load i32** @array, align 4                 ; <i32*> [#uses=1]
+  %scevgep = getelementptr i32* %1, i32 %tmp5     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, %0                         ; <i32> [#uses=2]
+  store i32 %3, i32* @G, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, 1001      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 64309c4..912939b 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7353541
+; rdar://7354376
 
 ; The generated code is no where near ideal. It's not recognizing the two
 ; constantpool entries being loaded can be merged into one.
@@ -15,8 +16,13 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_0
-; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_1
+; CHECK: ldr.n r2, LCPI1_0
+; CHECK: add r2, pc
+; CHECK: ldr r{{[0-9]+}}, [r2]
+; CHECK: LBB1_2
+; CHECK: LCPI1_0:
+; CHECK-NOT: LCPI1_1:
+; CHECK: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 64587c1..0fc6899 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -20,7 +20,8 @@ bb7:                                              ; preds = %bb3
   br i1 %a, label %bb11, label %bb9
 
 bb9:                                              ; preds = %bb7
-; CHECK:      @ BB#2:
+; CHECK:      cmp r0, #0
+; CHECK-NEXT: cmp r0, #0
 ; CHECK-NEXT: cbnz
   %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index 1d45d3c..496158c 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -23,7 +23,7 @@ bb52:                                             ; preds = %newFuncRoot
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: LBB1_2:
+; CHECK-NEXT: LBB1_1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index 7d093ec..f5a56e5 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep tbb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
 
 ; Do not use tbb / tbh if any destination is before the jumptable.
 ; rdar://7102917
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index b4274ad..44fa245 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | count 3
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mvn | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep it  | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t1
+; CHECK: mvn  r0, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: add.w r0, r1, r0
+; CHECK: it  gt
+; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
         %tmp3 = add i32 %tmp2, %b
@@ -10,6 +14,12 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t2
+; CHECK: add.w r0, r1, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
         %tmp3 = add i32 %tmp2, %b
@@ -17,6 +27,11 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 }
 
 define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t3
+; CHECK: sub.w r0, r1, #10
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 10
         %tmp3 = sub i32 %b, %tmp2
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 7746cd3..b106ced 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,22 +1,24 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsl
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsr
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep asr
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep mov
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsl
+; CHECK: add.w  r0, r0, r1, lsl #16
         %A = shl i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsr
+; CHECK: add.w  r0, r0, r1, lsr #16
         %A = lshr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_asr
+; CHECK: add.w  r0, r0, r1, asr #16
         %A = ashr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
@@ -24,6 +26,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
 
 ; i32 ror(n) = (x >> n) | (x << (32 - n))
 define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_ror
+; CHECK: add.w  r0, r0, r1, ror #16
         %A = lshr i32 %Y, 16
         %B = shl  i32 %Y, 16
         %C = or   i32 %B, %A
@@ -32,6 +36,10 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
 }
 
 define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
+; CHECK: t2ADDrs_noRegShift
+; CHECK: uxtb r2, r2
+; CHECK: lsls r1, r2
+; CHECK: add  r0, r1
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index 66cc884..092ec27 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smlabt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
+; CHECK: f3
+; CHECK: smlabt r0, r1, r2, r0
         %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
         %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index cdbf4ca..16ea85d 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smulbt | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smultt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
 
 define i32 @f1(i32 %y) {
+; CHECK: f1
+; CHECK: smulbt r0, r1, r0
         %tmp = load i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
@@ -16,6 +15,8 @@ define i32 @f1(i32 %y) {
 }
 
 define i32 @f2(i32 %x, i32 %y) {
+; CHECK: f2
+; CHECK: smultt r0, r1, r0
         %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 0a7221c..aef167b 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: vstmia sp
-; CHECK: vldmia sp
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 6c804ee..9af960b 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {str.*\\!} | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
+; CHECK: test1
+; CHECK: str  r1, [r0, #+16]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
@@ -10,6 +11,8 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
 }
 
 define i16* @test2(i16* %X, i32* %A) {
+; CHECK: test2
+; CHECK: strh r1, [r0, #+8]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 33ed543..054d5df 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,16 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtb | count 2
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtb | grep ror | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtab | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test0(i8 %A) {
+; CHECK: test0
+; CHECK: sxtb r0, r0
         %B = sext i8 %A to i32
 	ret i32 %B
 }
 
 define i8 @test1(i32 %A) signext {
+; CHECK: test1
+; CHECK: sxtb.w r0, r0, ror #8
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
@@ -19,6 +18,9 @@ define i8 @test1(i32 %A) signext {
 }
 
 define i32 @test2(i32 %A, i32 %X) signext {
+; CHECK: test2
+; CHECK: lsrs r0, r0, #8
+; CHECK: sxtab  r0, r1, r0
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
index c5cb6f3..2cf1d6a 100644
--- a/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -2,8 +2,6 @@
 
 ; Thumb2 target should reorder the bb's in order to use tbb / tbh.
 
-; XFAIL: *
-
 	%struct.R_flstr = type { i32, i32, i8* }
 	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
 @_C_nextcmd = external global i32		; <i32*> [#uses=3]
@@ -18,7 +16,7 @@ declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
 
 define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 ; CHECK: main:
-; CHECK: tbh
+; CHECK: tbb
 entry:
 	br label %bb42.i
 
@@ -26,7 +24,7 @@ bb1.i2:		; preds = %bb42.i
 	br label %bb40.i
 
 bb5.i:		; preds = %bb42.i
-	%0 = or i32 %_Y_flags.1, 32		; <i32> [#uses=1]
+	%0 = or i32 %argc, 32		; <i32> [#uses=1]
 	br label %bb40.i
 
 bb7.i:		; preds = %bb42.i
@@ -66,14 +64,10 @@ bb39.i:		; preds = %bb42.i
 	unreachable
 
 bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
-	%_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ]		; <i32> [#uses=1]
-	%_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ]		; <i1> [#uses=1]
 	br label %bb42.i
 
 bb42.i:		; preds = %bb40.i, %entry
-	%_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ]		; <i1> [#uses=2]
-	%_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ]		; <i32> [#uses=2]
-	switch i32 undef, label %bb39.i [
+	switch i32 %argc, label %bb39.i [
 		i32 67, label %bb33.i
 		i32 70, label %bb35.i
 		i32 77, label %bb37.i
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index c6867d9..0f122f2 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,34 +1,40 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp ne i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: teq.w r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6
+; CHECK: teq.w  r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -36,6 +42,8 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: teq.w  r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -43,6 +51,8 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8
+; CHECK: teq.w  r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -50,6 +60,8 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9
+; CHECK: teq.w  r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 37919dd..75e1d70 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtb | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtab | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxth | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @test1(i32 %A.u) zeroext {
+; CHECK: test1
+; CHECK: uxtb r0, r0
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
 define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+; CHECK: test2
+; CHECK: uxtab  r0, r0, r1
     %C.u = trunc i32 %B.u to i8
     %D.u = zext i8 %C.u to i32
     %E.u = add i32 %A.u, %D.u
@@ -15,6 +17,8 @@ define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
 }
 
 define i32 @test3(i32 %A.u) zeroext {
+; CHECK: test3
+; CHECK: uxth.w r0, r0, ror #8
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
     %D.u = or i32 %B.u, %C.u
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 4022d95..4e23f53 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,36 +1,47 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep uxt | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test1(i32 %x) {
+; CHECK: test1
+; CHECK: uxtb16.w  r0, r0
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32 @test2(i32 %x) {
+; CHECK: test2
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test3(i32 %x) {
+; CHECK: test3
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test4(i32 %x) {
+; CHECK: test4
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp6
 }
 
 define i32 @test5(i32 %x) {
+; CHECK: test5
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test6(i32 %x) {
+; CHECK: test6
+; CHECK: uxtb16.w  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -40,6 +51,8 @@ define i32 @test6(i32 %x) {
 }
 
 define i32 @test7(i32 %x) {
+; CHECK: test7
+; CHECK: uxtb16.w  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -49,6 +62,8 @@ define i32 @test7(i32 %x) {
 }
 
 define i32 @test8(i32 %x) {
+; CHECK: test8
+; CHECK: uxtb16.w  r0, r0, ror #24
 	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
 	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
@@ -57,6 +72,8 @@ define i32 @test8(i32 %x) {
 }
 
 define i32 @test9(i32 %x) {
+; CHECK: test9
+; CHECK: uxtb16.w  r0, r0, ror #24
 	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
@@ -65,6 +82,13 @@ define i32 @test9(i32 %x) {
 }
 
 define i32 @test10(i32 %p0) {
+; CHECK: test10
+; CHECK: mov.w r1, #16253176
+; CHECK: and.w r0, r1, r0, lsr #7
+; CHECK: lsrs  r1, r0, #5
+; CHECK: uxtb16.w  r1, r1
+; CHECK: orr.w r0, r1, r0
+
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
 	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index c106f57..3f67097 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -11,7 +11,7 @@ target triple = "i686-apple-darwin8.6.1"
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
 
 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
 
@@ -33,8 +33,8 @@ cond_false183:		; preds = %cond_false, %entry
 	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp388 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
-	%tmp392 = bitcast <4 x i32> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]
 	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]
 	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index 49f3a95..b045329 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -17,8 +17,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp98 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
-	%tmp102 = bitcast <4 x i32> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -32,8 +32,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp159 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
-	%tmp163 = bitcast <4 x i32> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -47,8 +47,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp220 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
-	%tmp224 = bitcast <4 x i32> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -62,8 +62,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp281 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
-	%tmp285 = bitcast <4 x i32> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -73,4 +73,4 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 989dfc5..b27ef83 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)
 
@@ -13,8 +13,8 @@ define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32
 	%tmp805 = add <4 x i32> %tmp777, zeroinitializer
 	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>
 	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )
-	%tmp1020 = tail call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
-	%tmp1030 = bitcast <8 x i16> %tmp1020 to <4 x i32>
+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>
 	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030
 	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>
 	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 83eb61a..2aea9c5 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; PR5534
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 0000000..8c62f4d
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Folded Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Reload"
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 )		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}
diff --git a/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 0000000..5398eef
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 0000000..a7c2020
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), %rcx
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB1_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 0000000..3ce9edb
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB1_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 0000000..5c1a2bc
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}
diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 0000000..633995d
--- /dev/null
+++ b/test/CodeGen/X86/bigstructret.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index de89374..4878448 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,7 +1,24 @@
-; RUN: llc < %s -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i64 @foo(i64 %x) {
+define i64 @test0(i64 %x) nounwind {
   %t = icmp eq i64 %x, 0
   %r = zext i1 %t to i64
   ret i64 %r
+; CHECK: test0:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
 }
+
+define i64 @test1(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test1:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-5.ll
new file mode 100644
index 0000000..88fae37
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-5.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index c0379d1..ec5236b 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -74,16 +74,16 @@ exit:
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB3_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: .LBB3_4:
 ; CHECK-NEXT:   call bar99
 ; CHECK-NEXT:   call get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB3_5
+; CHECK-NEXT:   jg .LBB3_6
 ; CHECK-NEXT:   call block_a_true_func
-; CHECK-NEXT:   jmp .LBB3_6
-; CHECK-NEXT: .LBB3_5:
-; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT:   jmp .LBB3_7
 ; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   call block_a_merge_func
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   call body
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index a1f38a7..9b53adb 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -4,7 +4,7 @@
 
 @flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
 
-define void @test(i32 %k, i32 %i) {
+define void @test(i32 %k, i32 %i) nounwind {
 entry:
 	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]
 	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index e340edd..c45a374 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | grep cmp | grep 240
 ; RUN: llc < %s -march=x86 | grep inc | count 1
 
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
 entry:
 	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
 	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 4ec2a02..b07eeb6 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -3,7 +3,7 @@
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
 
-define void @foo(i32 %N) {
+define void @foo(i32 %N) nounwind {
 entry:
 	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %tmp1019, label %bb, label %return
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index 81da82e..bbafcf7c 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 | not grep inc
 
-define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
 entry:
 	br i1 false, label %cond_next191, label %cond_true189
 
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
new file mode 100644
index 0000000..3f90245
--- /dev/null
+++ b/test/CodeGen/X86/object-size.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i32 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movq    $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i32) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6319cb8..21c1a3c 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -145,7 +145,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	ret void
 ; X64: 	t9:
 ; X64: 		movsd	(%rsi), %xmm0
-; X64: 		movhps	%xmm0, (%rdi)
+; X64:	        movaps  (%rdi), %xmm1
+; X64:	        movlhps %xmm0, %xmm1
+; X64:	        movaps  %xmm1, (%rdi)
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 0f32a50..d762392 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
 ; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 6
-; RUN:   grep asm-printer %t   | grep 177
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 0000000..0d86e56
--- /dev/null
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -0,0 +1,408 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*);
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   jbe .LBB3_3
+; CHECK-NEXT:   ucomiss %xmm2, %xmm0
+; CHECK-NEXT:   ja .LBB3_4
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT:   ucomiss %xmm2, %xmm1
+; CHECK-NEXT:   jbe .LBB3_2
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+; CHECK:        jmp .LBB4_7
+; CHECK-NEXT: .LBB4_12:
+; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   movb 16(%rax), %al
+; CHECK-NEXT:   cmpb $16, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   cmpb $23, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   jmp .LBB4_15
+; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT:   cmpb $23, %bl
+; CHECK-NEXT:   jne .LBB4_15
+; CHECK-NEXT: .LBB4_15:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        call func
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index a4f87c0..4923df2 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 4
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
@@ -9,3 +9,24 @@ entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
 }
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 556f103..f4930b0 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -18,4 +18,3 @@ entry:
         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp4
 }
-
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index e42b538..cdb030e 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -12,8 +12,8 @@ bb4743:		; preds = %bb1664
 	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
 	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp5266 = call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
-	%tmp5267 = bitcast <8 x i16> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
 	br label %bb5310
 bb5310:		; preds = %bb4743, %bb1664
@@ -21,4 +21,4 @@ bb5310:		; preds = %bb4743, %bb1664
 	ret i32 0
 }
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/test/CodeGen/XCore/bigstructret.ll b/test/CodeGen/XCore/bigstructret.ll
new file mode 100644
index 0000000..56af930
--- /dev/null
+++ b/test/CodeGen/XCore/bigstructret.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%0 = type { i32, i32, i32, i32 }
+%1 = type { i32, i32, i32, i32, i32 }
+
+; Structs of 4 words can be returned in registers
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+; CHECK: ReturnBigStruct:
+; CHECK: ldc r0, 12
+; CHECK: ldc r1, 24
+; CHECK: ldc r2, 48
+; CHECK: ldc r3, 24601
+; CHECK: retsp 0
+
+; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
+define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+  %0 = insertvalue %1 zeroinitializer, i32 12, 0
+  %1 = insertvalue %1 %0, i32 24, 1
+  %2 = insertvalue %1 %1, i32 48, 2
+  %3 = insertvalue %1 %2, i32 24601, 3
+  %4 = insertvalue %1 %3, i32 4321, 4
+  ret %1 %4
+}
+; CHECK: ReturnBigStruct2:
+; CHECK: ldc r1, 4321
+; CHECK: stw r1, r0[4]
+; CHECK: ldc r1, 24601
+; CHECK: stw r1, r0[3]
+; CHECK: ldc r1, 48
+; CHECK: stw r1, r0[2]
+; CHECK: ldc r1, 24
+; CHECK: stw r1, r0[1]
+; CHECK: ldc r1, 12
+; CHECK: stw r1, r0[0]
+; CHECK: retsp 0