1497 files changed, 26466 insertions, 7178 deletions
diff --git a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
index 1c2d910..45f6088 100644
--- a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
+++ b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
@@ -2,17 +2,19 @@
 ; is performed.  It is not legal to delete the second load instruction because
 ; the value computed by the first load instruction is changed by the store.
 
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep DONOTREMOVE
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 define i32 @test() {
-	%A = alloca i32
-	store i32 0, i32* %A
-    %X = load i32* %A
-    %B = bitcast i32* %A to i8*
-    %C = getelementptr i8* %B, i64 1
-	store i8 1, i8* %C    ; Aliases %A
-    %Y.DONOTREMOVE = load i32* %A
-	%Z = sub i32 %X, %Y.DONOTREMOVE
-    ret i32 %Z
+; CHECK: %Y.DONOTREMOVE = load i32* %A
+; CHECK: %Z = sub i32 0, %Y.DONOTREMOVE
+  %A = alloca i32
+  store i32 0, i32* %A
+  %X = load i32* %A
+  %B = bitcast i32* %A to i8*
+  %C = getelementptr i8* %B, i64 1
+  store i8 1, i8* %C    ; Aliases %A
+  %Y.DONOTREMOVE = load i32* %A
+  %Z = sub i32 %X, %Y.DONOTREMOVE
+  ret i32 %Z
 }
 
diff --git a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
index 5d20077..78f74a0 100644
--- a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
+++ b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
@@ -1,15 +1,14 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep sub
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 ; BasicAA was incorrectly concluding that P1 and P2 didn't conflict!
 
 define i32 @test(i32 *%Ptr, i64 %V) {
-	%P2 = getelementptr i32* %Ptr, i64 1
-	%P1 = getelementptr i32* %Ptr, i64 %V
-	%X = load i32* %P1
-	store i32 5, i32* %P2
-
-	%Y = load i32* %P1
-
-	%Z = sub i32 %X, %Y
-	ret i32 %Z
+; CHECK: sub i32 %X, %Y
+  %P2 = getelementptr i32* %Ptr, i64 1
+  %P1 = getelementptr i32* %Ptr, i64 %V
+  %X = load i32* %P1
+  store i32 5, i32* %P2
+  %Y = load i32* %P1
+  %Z = sub i32 %X, %Y
+  ret i32 %Z
 }
diff --git a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
index 56e3339..fd4c239 100644
--- a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
+++ b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
@@ -1,6 +1,9 @@
 ; In this test, a local alloca cannot alias an incoming argument.
 
-; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep sub
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
+
+; CHECK:      define i32 @test
+; CHECK-NEXT: ret i32 0
 
 define i32 @test(i32* %P) {
 	%X = alloca i32
diff --git a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
index 010a4588..768411e 100644
--- a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
+++ b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
@@ -1,11 +1,13 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa.
 
-; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
-; RUN: |& not grep May:
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 %T = type { i32, [10 x i8] }
 
+; CHECK:     Function: test
+; CHECK-NOT:   May:
+
 define void @test(%T* %P) {
   %A = getelementptr %T* %P, i64 0
   %B = getelementptr %T* %P, i64 0, i32 0
diff --git a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
index ce01db6..b7bbf77 100644
--- a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
+++ b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
@@ -1,13 +1,15 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa, but require analysis of getelementptr constant exprs.
 
-; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
-; RUN: |& not grep May:
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 %T = type { i32, [10 x i8] }
 
 @G = external global %T
 
+; CHECK:     Function: test
+; CHECK-NOT:   May:
+
 define void @test() {
   %D = getelementptr %T* @G, i64 0, i32 0
   %E = getelementptr %T* @G, i64 0, i32 1, i64 5
diff --git a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
index 56e4ed0..578aa594 100644
--- a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
+++ b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
@@ -1,10 +1,11 @@
-; RUN: opt < %s -basicaa -dse -S | grep {store i32 0}
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
 
 define void @test({i32,i32 }* %P) {
-	%Q = getelementptr {i32,i32}* %P, i32 1
-	%X = getelementptr {i32,i32}* %Q, i32 0, i32 1
-	%Y = getelementptr {i32,i32}* %Q, i32 1, i32 1
-	store i32 0, i32* %X
-	store i32 1, i32* %Y
-	ret void
+; CHECK: store i32 0, i32* %X
+  %Q = getelementptr {i32,i32}* %P, i32 1
+  %X = getelementptr {i32,i32}* %Q, i32 0, i32 1
+  %Y = getelementptr {i32,i32}* %Q, i32 1, i32 1
+  store i32 0, i32* %X
+  store i32 1, i32* %Y
+  ret void
 }
diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index 8320594..06a804c 100644
--- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -1,7 +1,9 @@
-; RUN: opt < %s -basicaa -aa-eval -disable-output |& grep {2 no alias respon}
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>&1 | FileCheck %s
 ; TEST that A[1][0] may alias A[0][i].
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
+; CHECK: 2 no alias responses
+
 define void @test(i32 %N) {
 entry:
 	%X = alloca [3 x [3 x i32]]		; <[3 x [3 x i32]]*> [#uses=4]
diff --git a/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll b/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
index 917bf25..46b6aaf 100644
--- a/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
+++ b/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
@@ -1,9 +1,5 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 ; PR1109
-; RUN: opt < %s -basicaa -gvn -instcombine -S | \
-; RUN:   grep {sub i32}
-; RUN: opt < %s -basicaa -gvn -instcombine -S | \
-; RUN:   not grep {ret i32 0}
-; END.
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
@@ -20,6 +16,9 @@ target triple = "i686-apple-darwin8"
 	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
 	%struct.rec = type { %struct.head_type }
 
+; CHECK: define i32 @test
+; CHECK:   %Z = sub i32 %A, %Q
+; CHECK:   ret i32 %Z
 
 define i32 @test(%struct.closure_type* %tmp18169) {
 	%tmp18174 = getelementptr %struct.closure_type* %tmp18169, i32 0, i32 4, i32 0, i32 0		; <i32*> [#uses=2]
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
index e6a26e3..2a6f5b9 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %., i32\\* %.} | grep {%x} | grep {%y}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
-declare i32* @unclear(i32* %a)
+; CHECK: Function: foo
+; CHECK:   MayAlias: i32* %x, i32* %y
 
 define void @foo(i32* noalias %x) {
   %y = call i32* @unclear(i32* %x)
@@ -8,3 +9,5 @@ define void @foo(i32* noalias %x) {
   store i32 0, i32* %y
   ret void
 }
+
+declare i32* @unclear(i32* %a)
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
index 7f33fa4..4be793e 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {9 no alias}
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {6 may alias}
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %Ipointer, i32\\* %Jpointer}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Function: foo
+; CHECK:   MayAlias: i32* %Ipointer, i32* %Jpointer
+; CHECK: 9 no alias responses
+; CHECK: 6 may alias responses
 
 define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) {
   %Ipointer = getelementptr i32* %p, i32 %i
diff --git a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index 5b81c17..ec0e2bd 100644
--- a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -1,11 +1,10 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 ; PR1600
-; RUN: opt < %s -basicaa -gvn -instcombine -S | \
-; RUN:   grep {ret i32 0}
-; END.
 
 declare i16 @llvm.cttz.i16(i16, i1)
 
 define i32 @test(i32* %P, i16* %Q) {
+; CHECK: ret i32 0
         %A = load i16* %Q               ; <i16> [#uses=1]
         %x = load i32* %P               ; <i32> [#uses=1]
         %B = call i16 @llvm.cttz.i16( i16 %A, i1 true )          ; <i16> [#uses=1]
diff --git a/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll b/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
index 78f24b5..429160e 100644
--- a/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
+++ b/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -basicaa -gvn -dce -S | grep tmp7
+; RUN: opt < %s -basicaa -gvn -dce -S | FileCheck %s
 
         %struct.A = type { i32 }
         %struct.B = type { %struct.A }
 @a = global %struct.B zeroinitializer           ; <%struct.B*> [#uses=2]
 
 define i32 @_Z3fooP1A(%struct.A* %b) {
+; CHECK: %tmp7 = load
+; CHECK: ret i32 %tmp7
 entry:
         store i32 1, i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8
         %tmp4 = getelementptr %struct.A* %b, i32 0, i32 0               ;<i32*> [#uses=1]
diff --git a/test/Analysis/BasicAA/2008-04-15-Byval.ll b/test/Analysis/BasicAA/2008-04-15-Byval.ll
index 2069401..428189a 100644
--- a/test/Analysis/BasicAA/2008-04-15-Byval.ll
+++ b/test/Analysis/BasicAA/2008-04-15-Byval.ll
@@ -1,10 +1,11 @@
-; RUN: opt < %s -std-compile-opts -S | grep store
+; RUN: opt < %s -std-compile-opts -S | FileCheck %s
 ; ModuleID = 'small2.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 	%struct.x = type { [4 x i32] }
 
 define void @foo(%struct.x* byval align 4  %X) nounwind  {
+; CHECK: store i32 2, i32* %tmp1
 entry:
 	%tmp = getelementptr %struct.x* %X, i32 0, i32 0		; <[4 x i32]*> [#uses=1]
 	%tmp1 = getelementptr [4 x i32]* %tmp, i32 0, i32 3		; <i32*> [#uses=1]
diff --git a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
index c9e553d..3db9a3f 100644
--- a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
+++ b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
@@ -1,7 +1,9 @@
-; RUN: opt < %s -basicaa -aa-eval |& grep {1 no alias response}
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>&1 | FileCheck %s
 
 declare noalias i32* @_Znwj(i32 %x) nounwind
 
+; CHECK: 1 no alias response
+
 define i32 @foo() {
   %A = call i32* @_Znwj(i32 4)
   %B = call i32* @_Znwj(i32 4)
diff --git a/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll b/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
index 3ab5d03..add7dee 100644
--- a/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
+++ b/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -basicaa -gvn -S | grep load
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 declare noalias i32* @noalias()
 
 define i32 @test(i32 %x) {
+; CHECK: load i32* %a
   %a = call i32* @noalias()
   store i32 1, i32* %a
   %b = getelementptr i32* %a, i32 %x
diff --git a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
index 17db2fd..c546d68 100644
--- a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
+++ b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
@@ -1,10 +1,13 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 ; If GEP base doesn't alias Z, then GEP doesn't alias Z.
 ; rdar://7282591
 
 @Y = common global i32 0
 @Z = common global i32 0
 
+; CHECK: Function: foo
+; CHECK:   NoAlias: i32* %P, i32* @Z
+
 define void @foo(i32 %cond) nounwind {
 entry:
   %a = alloca i32
diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
index 7b5584e..6656980 100644
--- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -1,8 +1,10 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {1 partial alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 ; PR7959
 
 target datalayout = "e-p:32:32:32"
 
+; CHECK: 1 partial alias response
+
 define i32 @test(i32* %tab, i32 %indvar) nounwind {
   %tmp31 = mul i32 %indvar, -2
   %tmp32 = add i32 %tmp31, 30
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
index c7b43ec..066f46b 100644
--- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll
+++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck  %s
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck  %s
 
 declare void @callee(double* %callee_arg)
 declare void @nocap_callee(double* nocapture %nocap_callee_arg)
diff --git a/test/Analysis/BasicAA/byval.ll b/test/Analysis/BasicAA/byval.ll
index 2aba753..673fee0 100644
--- a/test/Analysis/BasicAA/byval.ll
+++ b/test/Analysis/BasicAA/byval.ll
@@ -1,17 +1,17 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {ret i32 1}
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 	%struct.x = type { i32, i32, i32, i32 }
 @g = weak global i32 0		; <i32*> [#uses=1]
 
 define i32 @foo(%struct.x* byval  %a) nounwind  {
-entry:
-	%tmp1 = tail call i32 (...)* @bar( %struct.x* %a ) nounwind 		; <i32> [#uses=0]
-	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0		; <i32*> [#uses=2]
-	store i32 1, i32* %tmp2, align 4
-	store i32 2, i32* @g, align 4
-	%tmp4 = load i32* %tmp2, align 4		; <i32> [#uses=1]
-	ret i32 %tmp4
+; CHECK: ret i32 1
+  %tmp1 = tail call i32 (...)* @bar( %struct.x* %a ) nounwind 		; <i32> [#uses=0]
+  %tmp2 = getelementptr %struct.x* %a, i32 0, i32 0		; <i32*> [#uses=2]
+  store i32 1, i32* %tmp2, align 4
+  store i32 2, i32* @g, align 4
+  %tmp4 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+  ret i32 %tmp4
 }
 
 declare i32 @bar(...)
diff --git a/test/Analysis/BasicAA/cas.ll b/test/Analysis/BasicAA/cas.ll
index 754309c..d0cd9f4 100644
--- a/test/Analysis/BasicAA/cas.ll
+++ b/test/Analysis/BasicAA/cas.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 @flag0 = internal global i32 zeroinitializer
 @turn = internal global i32 zeroinitializer
 
+; CHECK: ret i32 0
 
 define i32 @main() {
   %a = load i32* @flag0
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 48ef259..232533c 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
 ; PR4267
 
 ; CHECK: MayAlias: double* %p.0.i.0, double* %p3
diff --git a/test/Analysis/BasicAA/dag.ll b/test/Analysis/BasicAA/dag.ll
index 501f4c3..1d2f6f1 100644
--- a/test/Analysis/BasicAA/dag.ll
+++ b/test/Analysis/BasicAA/dag.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Analysis/BasicAA/empty.ll b/test/Analysis/BasicAA/empty.ll
index 7b06780..dfc79f9 100644
--- a/test/Analysis/BasicAA/empty.ll
+++ b/test/Analysis/BasicAA/empty.ll
@@ -1,8 +1,10 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
-; RUN:   |& grep {NoAlias:	\{\}\\* \[%\]p, \{\}\\* \[%\]q}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
+; CHECK:      Function: foo:
+; CHECK-NEXT:   NoAlias: {}* %p, {}* %q
+
 define void @foo({}* %p, {}* %q) {
   store {} {}, {}* %p
   store {} {}, {}* %q
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 4fa6375..2c34fd5 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -tbaa -basicaa -gvn < %s | grep {ret i32 %}
-; RUN: opt -S -tbaa -gvn < %s | grep {ret i32 0}
+; RUN: opt -S -tbaa -basicaa -gvn < %s | FileCheck -check-prefix=BASICAA %s
+; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
 ; rdar://8875631, rdar://8875069
 
 ; BasicAA should notice that the store stores to the entire %u object,
@@ -14,6 +14,8 @@ target datalayout = "e-p:64:64:64"
 @endianness_test = global i64 1, align 8
 
 define i32 @signbit(double %x) nounwind {
+; BASICAA: ret i32 %tmp5.lobit
+; CHECK:   ret i32 0
 entry:
   %u = alloca %union.anon, align 8
   %tmp9 = getelementptr inbounds %union.anon* %u, i64 0, i32 0
diff --git a/test/Analysis/BasicAA/gcsetest.ll b/test/Analysis/BasicAA/gcsetest.ll
index a903362..db557b7 100644
--- a/test/Analysis/BasicAA/gcsetest.ll
+++ b/test/Analysis/BasicAA/gcsetest.ll
@@ -2,12 +2,15 @@
 ; disambiguating some obvious cases.  All loads should be removable in 
 ; this testcase.
 
-; RUN: opt < %s -basicaa -gvn -instcombine -dce -S \
-; RUN: | not grep load
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s
 
 @A = global i32 7
 @B = global i32 8
 
+; CHECK:      define i32 @test()
+; CHECK-NEXT:   store i32 123, i32* @B
+; CHECK-NEXT:   ret i32 0
+
 define i32 @test() {
 	%A1 = load i32* @A
 
@@ -18,6 +21,14 @@ define i32 @test() {
 	ret i32 %X
 }
 
+; CHECK:      define i32 @test2()
+; CHECK-NEXT:   br label %Loop
+; CHECK:      Loop:
+; CHECK-NEXT:   store i32 0, i32* @B
+; CHECK-NEXT:   br i1 true, label %out, label %Loop
+; CHECK:      out:
+; CHECK-NEXT:   ret i32 0
+
 define i32 @test2() {
         %A1 = load i32* @A
         br label %Loop
@@ -36,6 +47,10 @@ out:
 
 declare void @external()
 
+; CHECK:      define i32 @test3()
+; CHECK-NEXT:   call void @external()
+; CHECK-NEXT:   ret i32 7
+
 define i32 @test3() {
 	%X = alloca i32
 	store i32 7, i32* %X
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index 4bb2832..9c2c7ee 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S |& FileCheck %s
+; RUN: opt < %s -basicaa -gvn -instcombine -S 2>&1 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
index ebd349a..f0f1a63 100644
--- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
 
 ; CHECK: Just Ref: call void @ro() <-> call void @f0()
diff --git a/test/Analysis/BasicAA/must-and-partial.ll b/test/Analysis/BasicAA/must-and-partial.ll
index 93b6184..58139ff 100644
--- a/test/Analysis/BasicAA/must-and-partial.ll
+++ b/test/Analysis/BasicAA/must-and-partial.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
 
 ; When merging MustAlias and PartialAlias, merge to PartialAlias
 ; instead of MayAlias.
diff --git a/test/Analysis/BasicAA/no-escape-call.ll b/test/Analysis/BasicAA/no-escape-call.ll
index ccabce9..b93db6e 100644
--- a/test/Analysis/BasicAA/no-escape-call.ll
+++ b/test/Analysis/BasicAA/no-escape-call.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i1 true}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 ; PR2436
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
 define i1 @foo(i32 %i) nounwind  {
+; CHECK: ret i1 true
 entry:
 	%arr = alloca [10 x i8*]		; <[10 x i8*]*> [#uses=1]
 	%tmp2 = call i8* @getPtr( ) nounwind 		; <i8*> [#uses=2]
diff --git a/test/Analysis/BasicAA/nocapture.ll b/test/Analysis/BasicAA/nocapture.ll
index 7970fbb..a8658ec 100644
--- a/test/Analysis/BasicAA/nocapture.ll
+++ b/test/Analysis/BasicAA/nocapture.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 declare i32* @test(i32* nocapture)
 
 define i32 @test2() {
+; CHECK: ret i32 0
        %P = alloca i32
        %Q = call i32* @test(i32* %P)
        %a = load i32* %P
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 50fd5cd..6aa26c1 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 ; rdar://7282591
 
 @X = common global i32 0
 @Y = common global i32 0
 @Z = common global i32 0
 
+; CHECK:  NoAlias: i32* %P, i32* @Z
+
 define void @foo(i32 %cond) nounwind {
 entry:
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
index 0ed4a2c..b8fee00 100644
--- a/test/Analysis/BasicAA/phi-and-select.ll
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -1,8 +1,17 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
-; RUN:   |& grep {NoAlias:	double\\* \[%\]a, double\\* \[%\]b\$} | count 4
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
 ; BasicAA should detect NoAliases in PHIs and Selects.
 
+; CHECK: Function: foo
+; CHECK:  NoAlias: double* %a, double* %b
+; CHECK: Function: bar
+; CHECK:  NoAlias: double* %a, double* %b
+; CHECK: Function: qux
+; CHECK:  NoAlias: double* %a, double* %b
+; CHECK: Function: fin
+; CHECK:  NoAlias: double* %a, double* %b
+; CHECK: ===== Alias Analysis Evaluator Report =====
+
 ; Two PHIs in the same block.
 define void @foo(i1 %m, double* noalias %x, double* noalias %y) {
 entry:
diff --git a/test/Analysis/BasicAA/pure-const-dce.ll b/test/Analysis/BasicAA/pure-const-dce.ll
index 54e6e79..266e607 100644
--- a/test/Analysis/BasicAA/pure-const-dce.ll
+++ b/test/Analysis/BasicAA/pure-const-dce.ll
@@ -1,7 +1,25 @@
-; RUN: opt < %s -basicaa -gvn -S | grep TestConst | count 2
-; RUN: opt < %s -basicaa -gvn -S | grep TestPure  | count 3
-; RUN: opt < %s -basicaa -gvn -S | grep TestNone  | count 4
-@g = global i32 0		; <i32*> [#uses=1]
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+@g = global i32 0
+
+; CHECK:      @test
+; CHECK:      entry
+; CHECK:      %tmp0 = call i32 @TestConst(i32 5) readnone
+; CHECK-NEXT: %tmp1 = call i32 @TestPure(i32 6) readonly
+; CHECK-NEXT: %tmp2 = call i32 @TestNone(i32 7)
+; CHECK-NEXT: store i32 1, i32* @g
+; CHECK-NEXT: %tmp5 = call i32 @TestPure(i32 6) readonly
+; CHECK-NEXT: %tmp7 = call i32 @TestNone(i32 7)
+; CHECK-NEXT: %tmp8 = call i32 @TestNone(i32 7)
+; CHECK-NEXT: %sum0 = add i32 %tmp0, %tmp1
+; CHECK-NEXT: %sum1 = add i32 %sum0, %tmp2
+; CHECK-NEXT: %sum2 = add i32 %sum1, %tmp0
+; CHECK-NEXT: %sum3 = add i32 %sum2, %tmp0
+; CHECK-NEXT: %sum4 = add i32 %sum3, %tmp5
+; CHECK-NEXT: %sum5 = add i32 %sum4, %tmp5
+; CHECK-NEXT: %sum6 = add i32 %sum5, %tmp7
+; CHECK-NEXT: %sum7 = add i32 %sum6, %tmp8
+; CHECK-NEXT: ret i32 %sum7
 
 define i32 @test() {
 entry:
diff --git a/test/Analysis/BasicAA/tailcall-modref.ll b/test/Analysis/BasicAA/tailcall-modref.ll
index f7d6c57..ebeb28c 100644
--- a/test/Analysis/BasicAA/tailcall-modref.ll
+++ b/test/Analysis/BasicAA/tailcall-modref.ll
@@ -1,11 +1,7 @@
-; RUN: opt < %s -basicaa -gvn -instcombine |\
-; RUN:   llvm-dis | grep {ret i32 0}
-
-declare void @foo(i32*)
-
-declare void @bar()
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 define i32 @test() {
+; CHECK: ret i32 0
         %A = alloca i32         ; <i32*> [#uses=3]
         call void @foo( i32* %A )
         %X = load i32* %A               ; <i32> [#uses=1]
@@ -14,3 +10,7 @@ define i32 @test() {
         %Z = sub i32 %X, %Y             ; <i32> [#uses=1]
         ret i32 %Z
 }
+
+declare void @foo(i32*)
+
+declare void @bar()
diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
index 784d6c7..595cc42 100644
--- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
+++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
@@ -1,5 +1,9 @@
-; RUN: opt < %s -print-callgraph -disable-output |& \
-; RUN:   grep {calls function 'callee'} | count 2
+; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Call graph node <<null function>>
+; CHECK:  CS<{{.*}}> calls function 'callee'
+; CHECK: Call graph node for function: 'caller'
+; CHECK:  CS<{{.*}}> calls function 'callee'
 
 define internal void @callee(...) {
 entry:
diff --git a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
index 0c5ef92..ac95188 100644
--- a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
+++ b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
@@ -1,7 +1,9 @@
-; RUN: opt < %s -print-callgraph -disable-output |& grep {calls function}
+; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s
 
 @a = global void ()* @f		; <void ()**> [#uses=0]
 
+; CHECK: calls function 'f'
+
 define internal void @f() {
 	unreachable
 }
diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll
index 272a559..450dce5 100644
--- a/test/Analysis/CallGraph/no-intrinsics.ll
+++ b/test/Analysis/CallGraph/no-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -print-callgraph -disable-output |& FileCheck %s
+; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s
 
 ; Check that intrinsics aren't added to the call graph
 
diff --git a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
index e31f416..45efc42 100644
--- a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -domtree -break-crit-edges -analyze \
-; RUN:  -domtree | grep {3.*%brtrue }
+; RUN: opt < %s -domtree -break-crit-edges -analyze -domtree | FileCheck %s
 ; PR932
 
+; CHECK: [3] %brtrue {1,2}
+
 declare void @use1(i32)
 
 define void @f(i32 %i, i1 %c) {
diff --git a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
index 17ace8a..d51c159 100644
--- a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
+++ b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | grep call | count 2
+; RUN: opt < %s -globalsmodref-aa -gvn -S | FileCheck %s
 
 @g = internal global i32 0		; <i32*> [#uses=2]
 
@@ -8,6 +8,8 @@ define i32 @r() {
 }
 
 define i32 @f() {
+; CHECK: call i32 @e()
+; CHECK: call i32 @e()
 entry:
 	%tmp = call i32 @e( )		; <i32> [#uses=1]
 	store i32 %tmp, i32* @g
diff --git a/test/Analysis/GlobalsModRef/aliastest.ll b/test/Analysis/GlobalsModRef/aliastest.ll
index 75af4dc..4cfed71 100644
--- a/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/test/Analysis/GlobalsModRef/aliastest.ll
@@ -1,7 +1,12 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+
 @X = internal global i32 4		; <i32*> [#uses=1]
 
 define i32 @test(i32* %P) {
+; CHECK:      @test
+; CHECK-NEXT: store i32 7, i32* %P
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: ret i32 7
 	store i32 7, i32* %P
 	store i32 12, i32* @X
 	%V = load i32* %P		; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/chaining-analysis.ll b/test/Analysis/GlobalsModRef/chaining-analysis.ll
index 431b2a6..aeb76e4 100644
--- a/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
 
 ; This test requires the use of previous analyses to determine that
 ; doesnotmodX does not modify X (because 'sin' doesn't).
@@ -8,6 +8,10 @@
 declare double @sin(double) readnone
 
 define i32 @test(i32* %P) {
+; CHECK:      @test
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call double @doesnotmodX(double 1.000000e+00)
+; CHECK-NEXT: ret i32 12
 	store i32 12, i32* @X
 	call double @doesnotmodX( double 1.000000e+00 )		; <double>:1 [#uses=0]
 	%V = load i32* @X		; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index 826f55c..48ac6dd 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -1,9 +1,7 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | \
-; RUN:   grep {ret i32 0}
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
 
 @G = internal global i32* null		; <i32**> [#uses=3]
 
-
 declare i8* @malloc(i32)
 define void @test() {
 	%a = call i8* @malloc(i32 4)
@@ -13,6 +11,7 @@ define void @test() {
 }
 
 define i32 @test1(i32* %P) {
+; CHECK: ret i32 0
 	%g1 = load i32** @G		; <i32*> [#uses=2]
 	%h1 = load i32* %g1		; <i32> [#uses=1]
 	store i32 123, i32* %P
diff --git a/test/Analysis/GlobalsModRef/modreftest.ll b/test/Analysis/GlobalsModRef/modreftest.ll
index 3a02a94a..3eed916 100644
--- a/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/test/Analysis/GlobalsModRef/modreftest.ll
@@ -1,7 +1,12 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+
 @X = internal global i32 4		; <i32*> [#uses=2]
 
 define i32 @test(i32* %P) {
+; CHECK:      @test
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @doesnotmodX()
+; CHECK-NEXT: ret i32 12
 	store i32 12, i32* @X
 	call void @doesnotmodX( )
 	%V = load i32* @X		; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/purecse.ll b/test/Analysis/GlobalsModRef/purecse.ll
index 994aff8..e030417 100644
--- a/test/Analysis/GlobalsModRef/purecse.ll
+++ b/test/Analysis/GlobalsModRef/purecse.ll
@@ -1,6 +1,5 @@
 ; Test that pure functions are cse'd away
-; RUN: opt < %s -globalsmodref-aa -gvn -instcombine | \
-; RUN: llvm-dis | not grep sub
+; RUN: opt < %s -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
 
 define i32 @pure(i32 %X) {
         %Y = add i32 %X, 1              ; <i32> [#uses=1]
@@ -8,6 +7,8 @@ define i32 @pure(i32 %X) {
 }
 
 define i32 @test1(i32 %X) {
+; CHECK:      %A = call i32 @pure(i32 %X)
+; CHECK-NEXT: ret i32 0
         %A = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
         %B = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
         %C = sub i32 %A, %B             ; <i32> [#uses=1]
@@ -15,6 +16,9 @@ define i32 @test1(i32 %X) {
 }
 
 define i32 @test2(i32 %X, i32* %P) {
+; CHECK:      %A = call i32 @pure(i32 %X)
+; CHECK-NEXT: store i32 %X, i32* %P
+; CHECK-NEXT: ret i32 0
         %A = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
         store i32 %X, i32* %P ;; Does not invalidate 'pure' call.
         %B = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll
new file mode 100644
index 0000000..49bce67
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.anon = type { i32, i32, i32 }
+@b = global %struct.anon { i32 1, i32 0, i32 0 }, align 4
+@c = common global i32 0, align 4
+@a = common global %struct.anon zeroinitializer, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+
+; Make sure that the initial memcpy call does not go away
+; because the volatile load is in the way. PR12899
+
+; CHECK: main_entry:
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64
+
+define i32 @main() nounwind uwtable ssp {
+main_entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false)
+  %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4, !tbaa !0
+  store i32 %0, i32* @c, align 4, !tbaa !0
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
index 9355aee..7119007 100644
--- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
+++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -2,7 +2,7 @@
 ; not a child of the loopentry.6 loop.
 ;
 ; RUN: opt < %s -analyze -loops | \
-; RUN:   grep {^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>}
+; RUN:   grep "^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>"
 
 define void @getAndMoveToFrontDecode() {
 	br label %endif.2
diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll
index faec45a..ac77ab3 100644
--- a/test/Analysis/RegionInfo/block_sort.ll
+++ b/test/Analysis/RegionInfo/block_sort.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats -analyze < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @BZ2_blockSort() nounwind {
 start:
diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll
index 2ce57c3..1145ffd 100644
--- a/test/Analysis/RegionInfo/cond_loop.ll
+++ b/test/Analysis/RegionInfo/cond_loop.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 5:
diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll
index 7ca5c7c..6b39880 100644
--- a/test/Analysis/RegionInfo/condition_complicated.ll
+++ b/test/Analysis/RegionInfo/condition_complicated.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc zeroext i8 @handle_compress() nounwind {
 end165:
diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll
index 5fa940a..f551108 100644
--- a/test/Analysis/RegionInfo/condition_complicated_2.ll
+++ b/test/Analysis/RegionInfo/condition_complicated_2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc void @compress() nounwind {
 end33:
diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll
index 098c9b6..5e4d9d2 100644
--- a/test/Analysis/RegionInfo/condition_forward_edge.ll
+++ b/test/Analysis/RegionInfo/condition_forward_edge.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll
index 1b88596..e48413a 100644
--- a/test/Analysis/RegionInfo/condition_same_exit.ll
+++ b/test/Analysis/RegionInfo/condition_same_exit.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll
index 19b154b..00d9ed2 100644
--- a/test/Analysis/RegionInfo/condition_simple.ll
+++ b/test/Analysis/RegionInfo/condition_simple.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll
index 3b152d2..b84abec 100644
--- a/test/Analysis/RegionInfo/exit_in_condition.ll
+++ b/test/Analysis/RegionInfo/exit_in_condition.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc zeroext i8 @handle_compress() nounwind {
 entry:
diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll
index 59cead4..8e58828 100644
--- a/test/Analysis/RegionInfo/infinite_loop.ll
+++ b/test/Analysis/RegionInfo/infinite_loop.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -regions -analyze < %s 
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll
index 80c69b7a..a8227e3 100644
--- a/test/Analysis/RegionInfo/infinite_loop_2.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s 
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll
index 74ceafb..b09c9c1 100644
--- a/test/Analysis/RegionInfo/infinite_loop_3.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_3.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -regions -analyze < %s 
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll
index fd56af1..681c305 100644
--- a/test/Analysis/RegionInfo/infinite_loop_4.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_4.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s 
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll
index d1d6898..08d2ba8 100644
--- a/test/Analysis/RegionInfo/loop_with_condition.ll
+++ b/test/Analysis/RegionInfo/loop_with_condition.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll
index d4bf3cc..6449949 100644
--- a/test/Analysis/RegionInfo/loops_1.ll
+++ b/test/Analysis/RegionInfo/loops_1.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc zeroext i8 @loops_1() nounwind {
 entry:
diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll
index 07aa7c3..dc4a1ad 100644
--- a/test/Analysis/RegionInfo/loops_2.ll
+++ b/test/Analysis/RegionInfo/loops_2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @meread_() nounwind {
 entry:
diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll
index 829c157..1474e03 100644
--- a/test/Analysis/RegionInfo/mix_1.ll
+++ b/test/Analysis/RegionInfo/mix_1.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @a_linear_impl_fig_1() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll
index 7bc0e46..8de6472 100644
--- a/test/Analysis/RegionInfo/multiple_exiting_edge.ll
+++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll
@@ -1,5 +1,5 @@
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @normal_condition_0() nounwind {
 bb38:                                             ; preds = %bb34, %bb34, %bb37
diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll
index 9d8c455..a3707a1 100644
--- a/test/Analysis/RegionInfo/nested_loops.ll
+++ b/test/Analysis/RegionInfo/nested_loops.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc zeroext i8 @handle_compress() nounwind {
 entry:
diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll
index 377a84d..890b4f2 100644
--- a/test/Analysis/RegionInfo/next.ll
+++ b/test/Analysis/RegionInfo/next.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @MAIN__() nounwind {
 entry:
diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll
index 00b544b..96c87e0 100644
--- a/test/Analysis/RegionInfo/paper.ll
+++ b/test/Analysis/RegionInfo/paper.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define void @a_linear_impl_fig_1() nounwind {
 0:
diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll
index a97182b..e75661e 100644
--- a/test/Analysis/RegionInfo/two_loops_same_header.ll
+++ b/test/Analysis/RegionInfo/two_loops_same_header.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -regions -analyze < %s | FileCheck %s
-; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
-; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
-; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s
 
 define internal fastcc zeroext i8 @handle_compress() nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index 7ff130f..e0c5583 100644
--- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop %bb: backedge-taken count is 100}
+; RUN:   -scalar-evolution-max-iterations=0 | grep "Loop %bb: backedge-taken count is 100"
 ; PR1533
 
 @array = weak global [101 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index ab96243..036abf5 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
+; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb: backedge-taken count is (-1 + (-1 \* %x) + %y)"
 ; PR1597
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index b678fee..a3192b9 100644
--- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13}
+; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 13"
 ; PR1706
 
 define i32 @f() {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index fe3a7f4..d0644f7 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %header: backedge-taken count is (0 smax %n)}
+; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %header: backedge-taken count is (0 smax %n)"
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index bcc124d..41734d7 100644
--- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61}
+; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 61"
 ; PR2364
 
 define i32 @func_6() nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 9db9b71..5cf17a2 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution |& not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
 ; PR2261
 
 @lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index 1847665..195dfaa 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution |& not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
 ; PR2070
 
 define i32 @a(i32 %x) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index 86e07ec4..cbf200e 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113}
+; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 113"
 ; PR2088
 
 define void @fun() {
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 335bbaf..c25e4a3 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution |& \
-; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))}
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
+; RUN: grep "Loop %bb: backedge-taken count is (7 + (-1 \* %argc))"
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index db527fe..56a8343 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep {Loop %bb: Unpredictable backedge-taken count\\.}
+; RUN:  | grep "Loop %bb: Unpredictable backedge-taken count\."
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index fa9f21a..aaf6770 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 3}
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 3"
 ; XFAIL: *
 
 ; This is a tricky testcase for unsigned wrap detection which ScalarEvolution
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 25a0434..a1b3b71 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep {backedge-taken count is 255}
+; RUN: opt < %s -analyze -scalar-evolution | grep "backedge-taken count is 255"
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
 bb1.thread:
diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index 8152e98..bb14919 100644
--- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution |& \
-; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)}
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
+; RUN: grep "(((-1 * %i0) + (100005 smax %i0)) /u 5)"
 ; XFAIL: *
 
 define i32 @foo0(i32 %i0) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 3eaa492..7000626 100644
--- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 5}
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 5"
 ; XFAIL: *
 
 define i8 @foo0(i8 %i0) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index cc2a2e4..82f2608 100644
--- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution | not grep {/u -1}
+; RUN: opt < %s -analyze -scalar-evolution | not grep "/u -1"
 ; PR3275
 
 @g_16 = external global i16		; <i16*> [#uses=3]
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index c2e108a..ebd9f73 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep {(trunc i} | not grep ext
+; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext
 
 define i16 @test1(i8 %x) {
   %A = sext i8 %x to i32
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index dc7bd29..8a78043 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep {count is 2}
+; RUN: opt < %s -analyze -scalar-evolution | grep "count is 2"
 ; PR3171
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
diff --git a/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
new file mode 100644
index 0000000..52e6683
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -iv-users -S -disable-output
+;
+; PR12868: Infinite recursion:
+; getUDivExpr()->getZeroExtendExpr()->isLoopBackedgeGuardedBy()
+;
+; We actually want SCEV simplification to fail gracefully in this
+; case, so there's no output to check, just the absense of stack overflow.
+
+@c = common global i8 0, align 1
+
+define i32 @func() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i8 [ -1, %entry ], [ %inc, %for.body ]
+  %ui.0 = phi i32 [ undef, %entry ], [ %div, %for.body ]
+  %tobool = icmp eq i8 %storemerge, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %conv = sext i8 %storemerge to i32
+  %div = lshr i32 %conv, 1
+  %tobool2 = icmp eq i32 %div, 0
+  %inc = add i8 %storemerge, 1
+  br i1 %tobool2, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.cond
+  ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
new file mode 100644
index 0000000..eee4ec4
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -indvars -loop-unroll | FileCheck %s
+;
+; loop-unroll fully unrolls the inner loop, creating an interesting
+; chain of multiplication. indvars forces SCEV to run again on the
+; outer loop. While reducing the recurrence at %mul3, unsigned integer overflow
+; causes one of the terms to reach zero. This forces all multiples in
+; the recurrence to be zero, reducing the whole thing to a constant expression.
+;
+; PR12929: cast<Ty>() argument of incompatible type
+
+; CHECK: @func
+; CHECK: for.cond:
+; CHECK: %inc1 = phi i8 [ 0, %entry ], [ %0, %for.body ]
+; CHECK: br label %for.body
+
+; CHECK: for.body:
+; CHECK: %inc.9 = add i8 %inc.8, 1
+; CHECK: %0 = add i8 %inc1, 10
+; CHEKC: br label %for.cond
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+define void @func() noreturn nounwind uwtable ssp {
+entry:
+  br label %for.cond
+
+for.cond.loopexit:                                ; preds = %for.body
+  %mul.lcssa = phi i8 [ %mul, %for.body ]
+  %0 = add i8 %inc1, 10
+  %indvars.iv.next = add i8 %indvars.iv, 10
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.loopexit, %entry
+  %indvars.iv = phi i8 [ %indvars.iv.next, %for.cond.loopexit ], [ 10, %entry ]
+  %mul3 = phi i8 [ undef, %entry ], [ %mul.lcssa, %for.cond.loopexit ]
+  %inc1 = phi i8 [ 0, %entry ], [ %0, %for.cond.loopexit ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.cond
+  %inc26 = phi i8 [ %inc1, %for.cond ], [ %inc, %for.body ]
+  %mul45 = phi i8 [ %mul3, %for.cond ], [ %mul, %for.body ]
+  %inc = add i8 %inc26, 1
+  %mul = mul i8 %inc26, %mul45
+  %exitcond = icmp ne i8 %inc, %indvars.iv
+  br i1 %exitcond, label %for.body, label %for.cond.loopexit
+}
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 06f1b6f..e946d7a 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -80,3 +80,24 @@ for.cond539.preheader:
   unreachable
 }
 ; CHECK: Determining loop execution counts for: @test3
+
+; PR13489
+; We used to crash on this too.
+
+define void @test4() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %v2.02 = phi i64 [ 2, %entry ], [ %phitmp, %for.body ]
+  %v1.01 = phi i64 [ -2, %entry ], [ %sub1, %for.body ]
+  %sub1 = sub i64 %v1.01, %v2.02
+  %phitmp = add i64 %v2.02, 2
+  %tobool = icmp eq i64 %sub1, %phitmp
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @test4
diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll
index 1772573..06f4a85 100644
--- a/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:   | grep {\\-->  (zext} | count 2
+; RUN:   | grep "\-->  (zext" | count 2
 
 define i32 @foo(i32 %x) {
   %n = and i32 %x, 255
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index 24275f9..3d15c78 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb3: backedge-taken count is (-1 + %n)}
+; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb3: backedge-taken count is (-1 + %n)"
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll
index 4f6f1e2..2846797 100644
--- a/test/Analysis/ScalarEvolution/div-overflow.ll
+++ b/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep {\\-->  ((-128 \\* %a) /u -128)}
+; RUN:  | grep "\-->  ((-128 \* %a) /u -128)"
 
 ; Don't let ScalarEvolution fold this div away.
 
diff --git a/test/Analysis/ScalarEvolution/how-far-to-zero.ll b/test/Analysis/ScalarEvolution/how-far-to-zero.ll
new file mode 100644
index 0000000..07af88f
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/how-far-to-zero.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; PR13228
+define void @f() nounwind uwtable readnone {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %c.0 = phi i8 [ 1, %entry ], [ 0, %for.cond ]
+  %i.0 = phi i8 [ 0, %entry ], [ %inc, %for.cond ]
+  %lnot = icmp eq i8 %i.0, 0
+  %inc = add i8 %i.0, 1
+  br i1 %lnot, label %for.cond, label %while.cond
+
+while.cond:                                       ; preds = %while.body, %for.cond
+  %b.2 = phi i8 [ %add, %while.body ], [ 0, %for.cond ]
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  %add = add i8 %b.2, %c.0
+  %tobool7 = icmp eq i8 %add, 0
+  br i1 %tobool7, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.body, %while.cond
+  ret void
+}
+;CHECK: Loop %while.cond: <multiple exits> Unpredictable backedge-taken count.
diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll
index dd5a66c..a0abbb7 100644
--- a/test/Analysis/ScalarEvolution/scev-aa.ll
+++ b/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
-; RUN:   |& FileCheck %s
+; RUN:   2>&1 | FileCheck %s
 
 ; At the time of this writing, -basicaa misses the example of the form
 ; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references,
diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll
index 23e1210..8b3d641 100644
--- a/test/Analysis/ScalarEvolution/sext-inreg.ll
+++ b/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -analyze -scalar-evolution > %t
-; RUN: grep {sext i57 \{0,+,199\}<%bb> to i64} %t | count 1
-; RUN: grep {sext i59 \{0,+,199\}<%bb> to i64} %t | count 1
+; RUN: grep "sext i57 {0,+,199}<%bb> to i64" %t | count 1
+; RUN: grep "sext i59 {0,+,199}<%bb> to i64" %t | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index 9063cbb..c34596d 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep { -->  (sext i. \{.\*,+,.\*\}<%bb1> to i64)} | count 5
+; RUN:  | grep " -->  (sext i. {.*,+,.*}<%bb1> to i64)" | count 5
 
 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
 ; where the trip count is not within range.
diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll
index 15dd744..eceb429 100644
--- a/test/Analysis/ScalarEvolution/smax.ll
+++ b/test/Analysis/ScalarEvolution/smax.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -analyze -scalar-evolution | grep smax | count 2
 ; RUN: opt < %s -analyze -scalar-evolution | grep \
-; RUN:     {%. smax %. smax %.}
+; RUN:     "%. smax %. smax %."
 ; PR1614
 
 define i32 @x(i32 %a, i32 %b, i32 %c) {
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index cb4e267..94f6882 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000}
+; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 10000"
 ; PR1101
 
 @A = weak global [1000 x i32] zeroinitializer, align 32         
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index e26cbea..d84e99f 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution | \
-; RUN:   grep {backedge-taken count is 4}
+; RUN:   grep "backedge-taken count is 4"
 ; PR1101
 
 @A = weak global [1000 x i32] zeroinitializer, align 32         
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 1bf86ae..0cb6c95 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep {Loop %bb3\\.i: Unpredictable backedge-taken count\\.}
+; RUN:  | grep "Loop %bb3\.i: Unpredictable backedge-taken count\."
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll
index 116f62d..c02ae14 100644
--- a/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   | grep {sext.*trunc.*Exits: 11}
+; RUN:   | grep "sext.*trunc.*Exits: 11"
 
 ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8.
 
diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll
index 1194a1d..68a1ae1 100644
--- a/test/Analysis/ScalarEvolution/trip-count5.ll
+++ b/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -analyze -scalar-evolution > %t
 ; RUN: grep sext %t | count 2
-; RUN: not grep {(sext} %t
+; RUN: not grep "(sext" %t
 
 ; ScalarEvolution should be able to compute a maximum trip count
 ; value sufficient to fold away both sext casts.
diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll
index 956fb81..882f552 100644
--- a/test/Analysis/ScalarEvolution/trip-count6.ll
+++ b/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep {max backedge-taken count is 1\$}
+; RUN:  | grep "max backedge-taken count is 1$"
 
 @mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
 
diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll
index a8b797e..2bcb9e9 100644
--- a/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   | grep {Loop %bb7.i: Unpredictable backedge-taken count\\.}
+; RUN:   | grep "Loop %bb7.i: Unpredictable backedge-taken count\."
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll
index ac5ee60..005162b 100644
--- a/test/Analysis/ScalarEvolution/trip-count8.ll
+++ b/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep {Loop %for\\.body: backedge-taken count is (-1 + \[%\]ecx)}
+; RUN:  | grep "Loop %for\.body: backedge-taken count is (-1 + [%]ecx)"
 ; PR4599
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll
index c0530bb..4ab2f39 100644
--- a/test/Analysis/ScalarEvolution/xor-and.ll
+++ b/test/Analysis/ScalarEvolution/xor-and.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:   | grep {\\-->  (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)}
+; RUN:   | grep "\-->  (zext i4 (-8 + (trunc i64 (8 \* %x) to i4)) to i64)"
 
 ; ScalarEvolution shouldn't try to analyze %z into something like
 ;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
diff --git a/test/Assembler/2003-04-15-ConstantInitAssertion.ll b/test/Assembler/2003-04-15-ConstantInitAssertion.ll
index fa6b807..dddbdb1 100644
--- a/test/Assembler/2003-04-15-ConstantInitAssertion.ll
+++ b/test/Assembler/2003-04-15-ConstantInitAssertion.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s >/dev/null |& grep {struct initializer doesn't match struct element type}
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: grep "struct initializer doesn't match struct element type" %t
 ; Test the case of a misformed constant initializer
 ; This should cause an assembler error, not an assertion failure!
 constant { i32 } { float 1.0 }
diff --git a/test/Assembler/2003-05-21-MalformedShiftCrash.ll b/test/Assembler/2003-05-21-MalformedShiftCrash.ll
index a845d89..1d4ac40 100644
--- a/test/Assembler/2003-05-21-MalformedShiftCrash.ll
+++ b/test/Assembler/2003-05-21-MalformedShiftCrash.ll
@@ -1,4 +1,5 @@
 ; Found by inspection of the code
-; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer operands}
+; RUN: not llvm-as < %s > /dev/null 2> %t
+; RUN: grep "constexpr requires integer operands" %t
 
 global i32 ashr (float 1.0, float 2.0)
diff --git a/test/Assembler/2003-05-21-MalformedStructCrash.ll b/test/Assembler/2003-05-21-MalformedStructCrash.ll
index 8d20e070..44d3e23 100644
--- a/test/Assembler/2003-05-21-MalformedStructCrash.ll
+++ b/test/Assembler/2003-05-21-MalformedStructCrash.ll
@@ -1,4 +1,5 @@
 ; Found by inspection of the code
-; RUN: not llvm-as < %s  > /dev/null |& grep {initializer with struct type has wrong # elements}
+; RUN: not llvm-as < %s  > /dev/null 2> %t
+; RUN: grep "initializer with struct type has wrong # elements" %t
 
 global {} { i32 7, float 1.0, i32 7, i32 8 }
diff --git a/test/Assembler/2003-06-17-InvokeDisassemble.ll b/test/Assembler/2003-06-17-InvokeDisassemble.ll
deleted file mode 100644
index 8a9670e..0000000
--- a/test/Assembler/2003-06-17-InvokeDisassemble.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis
-
-define void @test() {
-  invoke void @test( )
-    to label %Next unwind label %Next
-
-Next:           ; preds = %0, %0
-  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-            cleanup
-  ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Assembler/2003-11-12-ConstantExprCast.ll b/test/Assembler/2003-11-12-ConstantExprCast.ll
index 149fef2..47a5353 100644
--- a/test/Assembler/2003-11-12-ConstantExprCast.ll
+++ b/test/Assembler/2003-11-12-ConstantExprCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | not grep { bitcast (}
+; RUN: llvm-as < %s | llvm-dis | not grep " bitcast ("
 
 @.Base64_1 = external constant [4 x i8]         ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2003-11-24-SymbolTableCrash.ll b/test/Assembler/2003-11-24-SymbolTableCrash.ll
index 041b0d9..28fd301 100644
--- a/test/Assembler/2003-11-24-SymbolTableCrash.ll
+++ b/test/Assembler/2003-11-24-SymbolTableCrash.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {multiple definition}
+; RUN: not llvm-as < %s 2>&1 | grep "multiple definition"
 
 define void @test() {
 	%tmp.1 = add i32 0, 1
diff --git a/test/Assembler/2004-01-11-getelementptrfolding.ll b/test/Assembler/2004-01-11-getelementptrfolding.ll
index c22aede..5249d0e 100644
--- a/test/Assembler/2004-01-11-getelementptrfolding.ll
+++ b/test/Assembler/2004-01-11-getelementptrfolding.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {getelementptr.*getelementptr}
+; RUN:   not grep "getelementptr.*getelementptr"
 
 %struct.TTriangleItem = type { i8*, i8*, [3 x %struct.TUVVertex] }
 %struct.TUVVertex = type { i16, i16, i16, i16 }
diff --git a/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll b/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
index 775b755..9f24f1a 100644
--- a/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
+++ b/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
@@ -1,3 +1,3 @@
-; RUN: not llvm-as %s |& grep {found end of file when expecting more instructions}
+; RUN: not llvm-as %s 2>&1 | grep "found end of file when expecting more instructions"
 
 define void @foo() {
diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
index 40648fd..4db5b74 100644
--- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll
+++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
@@ -1,4 +1,4 @@
 ; Test for PR463.  This program is erroneous, but should not crash llvm-as.
-; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.none'}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'struct.none'"
 
 @.FOO  = internal global %struct.none zeroinitializer
diff --git a/test/Assembler/2006-09-28-CrashOnInvalid.ll b/test/Assembler/2006-09-28-CrashOnInvalid.ll
index a203c6a..6041bdf 100644
--- a/test/Assembler/2006-09-28-CrashOnInvalid.ll
+++ b/test/Assembler/2006-09-28-CrashOnInvalid.ll
@@ -1,6 +1,7 @@
 ; Test for PR902.  This program is erroneous, but should not crash llvm-as.
 ; This tests that a simple error is caught and processed correctly.
-; RUN: not llvm-as < %s >/dev/null |& grep {floating point constant invalid for type}
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: grep "floating point constant invalid for type" %t
 
 define void @test() {
   add i32 1, 2.0
diff --git a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
index a39de1c..184e543 100644
--- a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
+++ b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
@@ -1,5 +1,5 @@
 ; The assembler should catch an undefined argument type .
-; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'typedef.bc_struct'}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'typedef.bc_struct'"
 
 ; %typedef.bc_struct = type opaque
 
diff --git a/test/Assembler/2007-01-16-CrashOnBadCast.ll b/test/Assembler/2007-01-16-CrashOnBadCast.ll
index 81f5458..aa74144 100644
--- a/test/Assembler/2007-01-16-CrashOnBadCast.ll
+++ b/test/Assembler/2007-01-16-CrashOnBadCast.ll
@@ -1,5 +1,5 @@
 ; PR1117
-; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "invalid cast opcode for cast from"
 
 define i8* @nada(i64 %X) {
     %result = trunc i64 %X to i8*
diff --git a/test/Assembler/2007-01-16-CrashOnBadCast2.ll b/test/Assembler/2007-01-16-CrashOnBadCast2.ll
index c05c609..479bef7 100644
--- a/test/Assembler/2007-01-16-CrashOnBadCast2.ll
+++ b/test/Assembler/2007-01-16-CrashOnBadCast2.ll
@@ -1,4 +1,4 @@
 ; PR1117
-; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "invalid cast opcode for cast from"
 
 @X = constant i8* trunc (i64 0 to i8*)
diff --git a/test/Assembler/2007-03-18-InvalidNumberedVar.ll b/test/Assembler/2007-03-18-InvalidNumberedVar.ll
index b2193b1..0f6b24d 100644
--- a/test/Assembler/2007-03-18-InvalidNumberedVar.ll
+++ b/test/Assembler/2007-03-18-InvalidNumberedVar.ll
@@ -1,5 +1,6 @@
 ; PR 1258
-; RUN: not llvm-as < %s >/dev/null |& grep {'%0' defined with type 'i1'}
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: grep "'%0' defined with type 'i1'" %t
 
 define i32 @test1(i32 %a, i32 %b) {
 entry:
diff --git a/test/Assembler/2007-03-19-NegValue.ll b/test/Assembler/2007-03-19-NegValue.ll
index e90cf35..64eb3cb 100644
--- a/test/Assembler/2007-03-19-NegValue.ll
+++ b/test/Assembler/2007-03-19-NegValue.ll
@@ -1,5 +1,5 @@
 ; Test whether negative values > 64 bits retain their negativeness.
-; RUN: llvm-as < %s | llvm-dis | grep {add i65.*, -1}
+; RUN: llvm-as < %s | llvm-dis | grep "add i65.*, -1"
 
 define i65 @testConsts(i65 %N) {
   %a = add i65 %N, -1
diff --git a/test/Assembler/2007-04-20-AlignedLoad.ll b/test/Assembler/2007-04-20-AlignedLoad.ll
index f0217ae..98a5428 100644
--- a/test/Assembler/2007-04-20-AlignedLoad.ll
+++ b/test/Assembler/2007-04-20-AlignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | grep {align 1024}
+; RUN: llvm-as < %s | llvm-dis | grep "align 1024"
 
 define i32 @test(i32* %arg) {
 entry:
diff --git a/test/Assembler/2007-04-20-AlignedStore.ll b/test/Assembler/2007-04-20-AlignedStore.ll
index 1b08c48..9e4dd9f 100644
--- a/test/Assembler/2007-04-20-AlignedStore.ll
+++ b/test/Assembler/2007-04-20-AlignedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | grep {align 1024}
+; RUN: llvm-as < %s | llvm-dis | grep "align 1024"
 
 define void @test(i32* %arg) {
 entry:
diff --git a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
index c26d9eb..b0ca1aa 100644
--- a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
+++ b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | grep {icmp.*test_weak.*null}
+; RUN: llvm-as < %s | llvm-dis | grep "icmp.*test_weak.*null"
 ; PR1358
 @G = global i1 icmp ne (i32 (...)* @test_weak, i32 (...)* null)
 
diff --git a/test/Assembler/2007-08-06-AliasInvalid.ll b/test/Assembler/2007-08-06-AliasInvalid.ll
index 9409598..3abdc41 100644
--- a/test/Assembler/2007-08-06-AliasInvalid.ll
+++ b/test/Assembler/2007-08-06-AliasInvalid.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s > /dev/null |& grep {expected top-level entity}
+; RUN: not llvm-as < %s > /dev/null 2> %t
+; RUN: grep "expected top-level entity" %t
 ; PR1577
 
 @anInt = global i32 1 
diff --git a/test/Assembler/2007-09-29-GC.ll b/test/Assembler/2007-09-29-GC.ll
index 789a0fe..9aefd0b 100644
--- a/test/Assembler/2007-09-29-GC.ll
+++ b/test/Assembler/2007-09-29-GC.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llvm-dis | grep {@f.*gc.*shadowstack}
-; RUN: llvm-as < %s | llvm-dis | grep {@g.*gc.*java}
+; RUN: llvm-as < %s | llvm-dis | grep "@f.*gc.*shadowstack"
+; RUN: llvm-as < %s | llvm-dis | grep "@g.*gc.*java"
 
 define void @f() gc "shadowstack" {
 entry:
diff --git a/test/Assembler/2007-12-11-AddressSpaces.ll b/test/Assembler/2007-12-11-AddressSpaces.ll
index 0eb4a79..7c9b5b5 100644
--- a/test/Assembler/2007-12-11-AddressSpaces.ll
+++ b/test/Assembler/2007-12-11-AddressSpaces.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llvm-dis | grep {addrspace(33)} | count 7
-; RUN: llvm-as < %s | llvm-dis | grep {addrspace(42)} | count 2
-; RUN: llvm-as < %s | llvm-dis | grep {addrspace(66)} | count 2
-; RUN: llvm-as < %s | llvm-dis | grep {addrspace(11)} | count 6
-; RUN: llvm-as < %s | llvm-dis | grep {addrspace(22)} | count 5
+; RUN: llvm-as < %s | llvm-dis | grep "addrspace(33)" | count 7
+; RUN: llvm-as < %s | llvm-dis | grep "addrspace(42)" | count 2
+; RUN: llvm-as < %s | llvm-dis | grep "addrspace(66)" | count 2
+; RUN: llvm-as < %s | llvm-dis | grep "addrspace(11)" | count 6
+; RUN: llvm-as < %s | llvm-dis | grep "addrspace(22)" | count 5
 
 	%struct.mystruct = type { i32, i32 addrspace(33)*, i32, i32 addrspace(33)* }
 @input = weak addrspace(42) global %struct.mystruct zeroinitializer  		; <%struct.mystruct addrspace(42)*> [#uses=1]
diff --git a/test/Assembler/2008-02-18-IntPointerCrash.ll b/test/Assembler/2008-02-18-IntPointerCrash.ll
index 5a661ad..4a33c36 100644
--- a/test/Assembler/2008-02-18-IntPointerCrash.ll
+++ b/test/Assembler/2008-02-18-IntPointerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s |& grep {integer constant must have integer type}
+; RUN: not llvm-as %s 2>&1 | grep "integer constant must have integer type"
 ; PR2060
 
 define i8* @foo() {
diff --git a/test/Assembler/2008-09-02-FunctionNotes2.ll b/test/Assembler/2008-09-02-FunctionNotes2.ll
index 8a49e89..97351e2 100644
--- a/test/Assembler/2008-09-02-FunctionNotes2.ll
+++ b/test/Assembler/2008-09-02-FunctionNotes2.ll
@@ -1,5 +1,5 @@
 ; Test function notes
-; RUN: not llvm-as %s -o /dev/null |& grep "Attributes noinline alwaysinline are incompatible"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Attributes noinline alwaysinline are incompatible"
 define void @fn1() alwaysinline  noinline {
   ret void
 }
diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll
index d3d374a..fc18ce7 100644
--- a/test/Assembler/ConstantExprFold.ll
+++ b/test/Assembler/ConstantExprFold.ll
@@ -1,7 +1,7 @@
 ; This test checks to make sure that constant exprs fold in some simple 
 ; situations
 
-; RUN: llvm-as < %s | llvm-dis | not grep {(}
+; RUN: llvm-as < %s | llvm-dis | not grep "("
 
 @A = global i64 0
 
diff --git a/test/Assembler/extractvalue-invalid-idx.ll b/test/Assembler/extractvalue-invalid-idx.ll
index 9a215f7..b5a398c 100644
--- a/test/Assembler/extractvalue-invalid-idx.ll
+++ b/test/Assembler/extractvalue-invalid-idx.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& FileCheck %s
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 ; PR4170
 
 ; CHECK: invalid indices for extractvalue
diff --git a/test/Assembler/getelementptr_struct.ll b/test/Assembler/getelementptr_struct.ll
index bfebf29..0293672 100644
--- a/test/Assembler/getelementptr_struct.ll
+++ b/test/Assembler/getelementptr_struct.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s >/dev/null |& FileCheck %s
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
 ; Test the case of a incorrect indices type into struct
 
 ; CHECK: invalid getelementptr indices
diff --git a/test/Assembler/half-constprop.ll b/test/Assembler/half-constprop.ll
new file mode 100644
index 0000000..03ccdda
--- /dev/null
+++ b/test/Assembler/half-constprop.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s
+; Testing half constant propagation.
+
+define half @abc() nounwind {
+entry:
+  %a = alloca half, align 2
+  %b = alloca half, align 2
+  %.compoundliteral = alloca float, align 4
+  store half 0xH4200, half* %a, align 2
+  store half 0xH4B9A, half* %b, align 2
+  %tmp = load half* %a, align 2
+  %tmp1 = load half* %b, align 2
+  %add = fadd half %tmp, %tmp1
+; CHECK: 0xH4C8D
+  ret half %add
+}
+
diff --git a/test/Assembler/half-conv.ll b/test/Assembler/half-conv.ll
new file mode 100644
index 0000000..bf9ae57
--- /dev/null
+++ b/test/Assembler/half-conv.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s
+; Testing half to float conversion.
+
+define float @abc() nounwind {
+entry:
+  %a = alloca half, align 2
+  %.compoundliteral = alloca float, align 4
+  store half 0xH4C8D, half* %a, align 2
+  %tmp = load half* %a, align 2
+  %conv = fpext half %tmp to float
+; CHECK: 0x4032340000000000
+  ret float %conv
+}
diff --git a/test/Assembler/half.ll b/test/Assembler/half.ll
new file mode 100644
index 0000000..63ad392
--- /dev/null
+++ b/test/Assembler/half.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Basic smoke test for half type.
+
+; CHECK: define half @halftest
+define half  @halftest(half %A0) {
+; CHECK: ret half %A0
+        ret half %A0
+}
diff --git a/test/Assembler/insertvalue-invalid-idx.ll b/test/Assembler/insertvalue-invalid-idx.ll
index 355d4e8..74642f4 100644
--- a/test/Assembler/insertvalue-invalid-idx.ll
+++ b/test/Assembler/insertvalue-invalid-idx.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& FileCheck %s
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 ; CHECK: invalid indices for insertvalue
 
diff --git a/test/Assembler/invalid_cast.ll b/test/Assembler/invalid_cast.ll
index f682835..91e81c7 100644
--- a/test/Assembler/invalid_cast.ll
+++ b/test/Assembler/invalid_cast.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& FileCheck %s
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 ; CHECK: invalid cast opcode for cast from '<4 x i64>' to '<3 x i8>'
 
diff --git a/test/Assembler/invalid_cast2.ll b/test/Assembler/invalid_cast2.ll
index a01b935..5ce9546 100644
--- a/test/Assembler/invalid_cast2.ll
+++ b/test/Assembler/invalid_cast2.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& FileCheck %s
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 ; CHECK: invalid cast opcode for cast from '<4 x i64>' to 'i8'
 
diff --git a/test/Assembler/tls-models.ll b/test/Assembler/tls-models.ll
new file mode 100644
index 0000000..42f2496
--- /dev/null
+++ b/test/Assembler/tls-models.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: @a = thread_local global i32 0
+; CHECK: @b = thread_local(localdynamic) global i32 0
+; CHECK: @c = thread_local(initialexec) global i32 0
+; CHECK: @d = thread_local(localexec) global i32 0
+
+@a = thread_local global i32 0
+@b = thread_local(localdynamic) global i32 0
+@c = thread_local(initialexec) global i32 0
+@d = thread_local(localexec) global i32 0
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index 9329286..b8eb6d3 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -84,7 +84,7 @@ let test_target () =
 (*===-- Constants ---------------------------------------------------------===*)
 
 let test_constants () =
-  (* RUN: grep {const_int.*i32.*-1} < %t.ll
+  (* RUN: grep "const_int.*i32.*-1" < %t.ll
    *)
   group "int";
   let c = const_int i32_type (-1) in
@@ -92,44 +92,44 @@ let test_constants () =
   insist (i32_type = type_of c);
   insist (is_constant c);
 
-  (* RUN: grep {const_sext_int.*i64.*-1} < %t.ll
+  (* RUN: grep "const_sext_int.*i64.*-1" < %t.ll
    *)
   group "sext int";
   let c = const_int i64_type (-1) in
   ignore (define_global "const_sext_int" c m);
   insist (i64_type = type_of c);
 
-  (* RUN: grep {const_zext_int64.*i64.*4294967295} < %t.ll
+  (* RUN: grep "const_zext_int64.*i64.*4294967295" < %t.ll
    *)
   group "zext int64";
   let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in
   ignore (define_global "const_zext_int64" c m);
   insist (i64_type = type_of c);
 
-  (* RUN: grep {const_int_string.*i32.*-1} < %t.ll
+  (* RUN: grep "const_int_string.*i32.*-1" < %t.ll
    *)
   group "int string";
   let c = const_int_of_string i32_type "-1" 10 in
   ignore (define_global "const_int_string" c m);
   insist (i32_type = type_of c);
 
-  (* RUN: grep {const_string.*"cruel\\\\00world"} < %t.ll
+  (* RUN: grep 'const_string.*"cruel\00world"' < %t.ll
    *)
   group "string";
   let c = const_string context "cruel\000world" in
   ignore (define_global "const_string" c m);
   insist ((array_type i8_type 11) = type_of c);
 
-  (* RUN: grep {const_stringz.*"hi\\\\00again\\\\00"} < %t.ll
+  (* RUN: grep 'const_stringz.*"hi\00again\00"' < %t.ll
    *)
   group "stringz";
   let c = const_stringz context "hi\000again" in
   ignore (define_global "const_stringz" c m);
   insist ((array_type i8_type 9) = type_of c);
 
-  (* RUN: grep {const_single.*2.75} < %t.ll
-   * RUN: grep {const_double.*3.1459} < %t.ll
-   * RUN: grep {const_double_string.*1.25} < %t.ll
+  (* RUN: grep "const_single.*2.75" < %t.ll
+   * RUN: grep "const_double.*3.1459" < %t.ll
+   * RUN: grep "const_double_string.*1.25" < %t.ll
    *)
   begin group "real";
     let cs = const_float float_type 2.75 in
@@ -150,14 +150,14 @@ let test_constants () =
   let three = const_int i32_type 3 in
   let four = const_int i32_type 4 in
   
-  (* RUN: grep {const_array.*\\\[i32 3, i32 4\\\]} < %t.ll
+  (* RUN: grep "const_array.*[i32 3, i32 4]" < %t.ll
    *)
   group "array";
   let c = const_array i32_type [| three; four |] in
   ignore (define_global "const_array" c m);
   insist ((array_type i32_type 2) = (type_of c));
   
-  (* RUN: grep {const_vector.*<i16 1, i16 2.*>} < %t.ll
+  (* RUN: grep "const_vector.*<i16 1, i16 2.*>" < %t.ll
    *)
   group "vector";
   let c = const_vector [| one; two; one; two;
@@ -165,7 +165,7 @@ let test_constants () =
   ignore (define_global "const_vector" c m);
   insist ((vector_type i16_type 8) = (type_of c));
 
-  (* RUN: grep {const_structure.*.i16 1, i16 2, i32 3, i32 4} < %t.ll
+  (* RUN: grep "const_structure.*.i16 1, i16 2, i32 3, i32 4" < %t.ll
    *)
   group "structure";
   let c = const_struct context [| one; two; three; four |] in
@@ -173,27 +173,27 @@ let test_constants () =
   insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
         = (type_of c));
 
-  (* RUN: grep {const_null.*zeroinit} < %t.ll
+  (* RUN: grep "const_null.*zeroinit" < %t.ll
    *)
   group "null";
   let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
                                                     double_type |]) in
   ignore (define_global "const_null" c m);
   
-  (* RUN: grep {const_all_ones.*-1} < %t.ll
+  (* RUN: grep "const_all_ones.*-1" < %t.ll
    *)
   group "all ones";
   let c = const_all_ones i64_type in
   ignore (define_global "const_all_ones" c m);
 
   group "pointer null"; begin
-    (* RUN: grep {const_pointer_null = global i64\\* null} < %t.ll
+    (* RUN: grep "const_pointer_null = global i64* null" < %t.ll
      *)
     let c = const_pointer_null (pointer_type i64_type) in
     ignore (define_global "const_pointer_null" c m);
   end;
   
-  (* RUN: grep {const_undef.*undef} < %t.ll
+  (* RUN: grep "const_undef.*undef" < %t.ll
    *)
   group "undef";
   let c = undef i1_type in
@@ -202,35 +202,35 @@ let test_constants () =
   insist (is_undef c);
   
   group "constant arithmetic";
-  (* RUN: grep {@const_neg = global i64 sub} < %t.ll
-   * RUN: grep {@const_nsw_neg = global i64 sub nsw } < %t.ll
-   * RUN: grep {@const_nuw_neg = global i64 sub nuw } < %t.ll
-   * RUN: grep {@const_fneg = global double fsub } < %t.ll
-   * RUN: grep {@const_not = global i64 xor } < %t.ll
-   * RUN: grep {@const_add = global i64 add } < %t.ll
-   * RUN: grep {@const_nsw_add = global i64 add nsw } < %t.ll
-   * RUN: grep {@const_nuw_add = global i64 add nuw } < %t.ll
-   * RUN: grep {@const_fadd = global double fadd } < %t.ll
-   * RUN: grep {@const_sub = global i64 sub } < %t.ll
-   * RUN: grep {@const_nsw_sub = global i64 sub nsw } < %t.ll
-   * RUN: grep {@const_nuw_sub = global i64 sub nuw } < %t.ll
-   * RUN: grep {@const_fsub = global double fsub } < %t.ll
-   * RUN: grep {@const_mul = global i64 mul } < %t.ll
-   * RUN: grep {@const_nsw_mul = global i64 mul nsw } < %t.ll
-   * RUN: grep {@const_nuw_mul = global i64 mul nuw } < %t.ll
-   * RUN: grep {@const_fmul = global double fmul } < %t.ll
-   * RUN: grep {@const_udiv = global i64 udiv } < %t.ll
-   * RUN: grep {@const_sdiv = global i64 sdiv } < %t.ll
-   * RUN: grep {@const_exact_sdiv = global i64 sdiv exact } < %t.ll
-   * RUN: grep {@const_fdiv = global double fdiv } < %t.ll
-   * RUN: grep {@const_urem = global i64 urem } < %t.ll
-   * RUN: grep {@const_srem = global i64 srem } < %t.ll
-   * RUN: grep {@const_frem = global double frem } < %t.ll
-   * RUN: grep {@const_and = global i64 and } < %t.ll
-   * RUN: grep {@const_or = global i64 or } < %t.ll
-   * RUN: grep {@const_xor = global i64 xor } < %t.ll
-   * RUN: grep {@const_icmp = global i1 icmp sle } < %t.ll
-   * RUN: grep {@const_fcmp = global i1 fcmp ole } < %t.ll
+  (* RUN: grep "@const_neg = global i64 sub" < %t.ll
+   * RUN: grep "@const_nsw_neg = global i64 sub nsw " < %t.ll
+   * RUN: grep "@const_nuw_neg = global i64 sub nuw " < %t.ll
+   * RUN: grep "@const_fneg = global double fsub " < %t.ll
+   * RUN: grep "@const_not = global i64 xor " < %t.ll
+   * RUN: grep "@const_add = global i64 add " < %t.ll
+   * RUN: grep "@const_nsw_add = global i64 add nsw " < %t.ll
+   * RUN: grep "@const_nuw_add = global i64 add nuw " < %t.ll
+   * RUN: grep "@const_fadd = global double fadd " < %t.ll
+   * RUN: grep "@const_sub = global i64 sub " < %t.ll
+   * RUN: grep "@const_nsw_sub = global i64 sub nsw " < %t.ll
+   * RUN: grep "@const_nuw_sub = global i64 sub nuw " < %t.ll
+   * RUN: grep "@const_fsub = global double fsub " < %t.ll
+   * RUN: grep "@const_mul = global i64 mul " < %t.ll
+   * RUN: grep "@const_nsw_mul = global i64 mul nsw " < %t.ll
+   * RUN: grep "@const_nuw_mul = global i64 mul nuw " < %t.ll
+   * RUN: grep "@const_fmul = global double fmul " < %t.ll
+   * RUN: grep "@const_udiv = global i64 udiv " < %t.ll
+   * RUN: grep "@const_sdiv = global i64 sdiv " < %t.ll
+   * RUN: grep "@const_exact_sdiv = global i64 sdiv exact " < %t.ll
+   * RUN: grep "@const_fdiv = global double fdiv " < %t.ll
+   * RUN: grep "@const_urem = global i64 urem " < %t.ll
+   * RUN: grep "@const_srem = global i64 srem " < %t.ll
+   * RUN: grep "@const_frem = global double frem " < %t.ll
+   * RUN: grep "@const_and = global i64 and " < %t.ll
+   * RUN: grep "@const_or = global i64 or " < %t.ll
+   * RUN: grep "@const_xor = global i64 xor " < %t.ll
+   * RUN: grep "@const_icmp = global i1 icmp sle " < %t.ll
+   * RUN: grep "@const_fcmp = global i1 fcmp ole " < %t.ll
    *)
   let void_ptr = pointer_type i8_type in
   let five = const_int i64_type 5 in
@@ -269,18 +269,18 @@ let test_constants () =
   ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m);
   
   group "constant casts";
-  (* RUN: grep {const_trunc.*trunc} < %t.ll
-   * RUN: grep {const_sext.*sext} < %t.ll
-   * RUN: grep {const_zext.*zext} < %t.ll
-   * RUN: grep {const_fptrunc.*fptrunc} < %t.ll
-   * RUN: grep {const_fpext.*fpext} < %t.ll
-   * RUN: grep {const_uitofp.*uitofp} < %t.ll
-   * RUN: grep {const_sitofp.*sitofp} < %t.ll
-   * RUN: grep {const_fptoui.*fptoui} < %t.ll
-   * RUN: grep {const_fptosi.*fptosi} < %t.ll
-   * RUN: grep {const_ptrtoint.*ptrtoint} < %t.ll
-   * RUN: grep {const_inttoptr.*inttoptr} < %t.ll
-   * RUN: grep {const_bitcast.*bitcast} < %t.ll
+  (* RUN: grep "const_trunc.*trunc" < %t.ll
+   * RUN: grep "const_sext.*sext" < %t.ll
+   * RUN: grep "const_zext.*zext" < %t.ll
+   * RUN: grep "const_fptrunc.*fptrunc" < %t.ll
+   * RUN: grep "const_fpext.*fpext" < %t.ll
+   * RUN: grep "const_uitofp.*uitofp" < %t.ll
+   * RUN: grep "const_sitofp.*sitofp" < %t.ll
+   * RUN: grep "const_fptoui.*fptoui" < %t.ll
+   * RUN: grep "const_fptosi.*fptosi" < %t.ll
+   * RUN: grep "const_ptrtoint.*ptrtoint" < %t.ll
+   * RUN: grep "const_inttoptr.*inttoptr" < %t.ll
+   * RUN: grep "const_bitcast.*bitcast" < %t.ll
    *)
   let i128_type = integer_type context 128 in
   ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
@@ -302,12 +302,12 @@ let test_constants () =
   ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
   
   group "misc constants";
-  (* RUN: grep {const_size_of.*getelementptr.*null} < %t.ll
-   * RUN: grep {const_gep.*getelementptr} < %t.ll
-   * RUN: grep {const_select.*select} < %t.ll
-   * RUN: grep {const_extractelement.*extractelement} < %t.ll
-   * RUN: grep {const_insertelement.*insertelement} < %t.ll
-   * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll
+  (* RUN: grep "const_size_of.*getelementptr.*null" < %t.ll
+   * RUN: grep "const_gep.*getelementptr" < %t.ll
+   * RUN: grep "const_select.*select" < %t.ll
+   * RUN: grep "const_extractelement.*extractelement" < %t.ll
+   * RUN: grep "const_insertelement.*insertelement" < %t.ll
+   * RUN: grep "const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>" < %t.ll
    *)
   ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
   ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
@@ -356,7 +356,7 @@ let test_global_values () =
   let (++) x f = f x; x in
   let zero32 = const_null i32_type in
 
-  (* RUN: grep {GVal01} < %t.ll
+  (* RUN: grep "GVal01" < %t.ll
    *)
   group "naming";
   let g = define_global "TEMPORARY" zero32 m in
@@ -364,28 +364,28 @@ let test_global_values () =
   set_value_name "GVal01" g;
   insist ("GVal01" = value_name g);
 
-  (* RUN: grep {GVal02.*linkonce} < %t.ll
+  (* RUN: grep "GVal02.*linkonce" < %t.ll
    *)
   group "linkage";
   let g = define_global "GVal02" zero32 m ++
           set_linkage Linkage.Link_once in
   insist (Linkage.Link_once = linkage g);
 
-  (* RUN: grep {GVal03.*Hanalei} < %t.ll
+  (* RUN: grep "GVal03.*Hanalei" < %t.ll
    *)
   group "section";
   let g = define_global "GVal03" zero32 m ++
           set_section "Hanalei" in
   insist ("Hanalei" = section g);
   
-  (* RUN: grep {GVal04.*hidden} < %t.ll
+  (* RUN: grep "GVal04.*hidden" < %t.ll
    *)
   group "visibility";
   let g = define_global "GVal04" zero32 m ++
           set_visibility Visibility.Hidden in
   insist (Visibility.Hidden = visibility g);
   
-  (* RUN: grep {GVal05.*align 128} < %t.ll
+  (* RUN: grep "GVal05.*align 128" < %t.ll
    *)
   group "alignment";
   let g = define_global "GVal05" zero32 m ++
@@ -400,7 +400,7 @@ let test_global_variables () =
   let fourty_two32 = const_int i32_type 42 in
 
   group "declarations"; begin
-    (* RUN: grep {GVar01.*external} < %t.ll
+    (* RUN: grep "GVar01.*external" < %t.ll
      *)
     insist (None == lookup_global "GVar01" m);
     let g = declare_global i32_type "GVar01" m in
@@ -422,8 +422,8 @@ let test_global_variables () =
   end;
   
   group "definitions"; begin
-    (* RUN: grep {GVar02.*42} < %t.ll
-     * RUN: grep {GVar03.*42} < %t.ll
+    (* RUN: grep "GVar02.*42" < %t.ll
+     * RUN: grep "GVar03.*42" < %t.ll
      *)
     let g = define_global "GVar02" fourty_two32 m in
     let g2 = declare_global i32_type "GVar03" m ++
@@ -440,20 +440,20 @@ let test_global_variables () =
     insist ((global_initializer g) == (global_initializer g2));
   end;
 
-  (* RUN: grep {GVar04.*thread_local} < %t.ll
+  (* RUN: grep "GVar04.*thread_local" < %t.ll
    *)
   group "threadlocal";
   let g = define_global "GVar04" fourty_two32 m ++
           set_thread_local true in
   insist (is_thread_local g);
 
-  (* RUN: grep -v {GVar05} < %t.ll
+  (* RUN: grep -v "GVar05" < %t.ll
    *)
   group "delete";
   let g = define_global "GVar05" fourty_two32 m in
   delete_global g;
 
-  (* RUN: grep -v {ConstGlobalVar.*constant} < %t.ll
+  (* RUN: grep -v "ConstGlobalVar.*constant" < %t.ll
    *)
   group "constant";
   let g = define_global "ConstGlobalVar" fourty_two32 m in
@@ -542,7 +542,7 @@ let test_users () =
 (*===-- Aliases -----------------------------------------------------------===*)
 
 let test_aliases () =
-  (* RUN: grep {@alias = alias i32\\* @aliasee} < %t.ll
+  (* RUN: grep "@alias = alias i32* @aliasee" < %t.ll
    *)
   let v = declare_global i32_type "aliasee" m in
   ignore (add_alias m (pointer_type i32_type) v "alias")
@@ -554,7 +554,7 @@ let test_functions () =
   let ty = function_type i32_type [| i32_type; i64_type |] in
   let ty2 = function_type i8_type [| i8_type; i64_type |] in
   
-  (* RUN: grep {declare i32 @Fn1\(i32, i64\)} < %t.ll
+  (* RUN: grep "declare i32 @Fn1\(i32, i64\)" < %t.ll
    *)
   begin group "declare";
     insist (None = lookup_function "Fn1" m);
@@ -570,13 +570,13 @@ let test_functions () =
     insist (m == global_parent fn)
   end;
   
-  (* RUN: grep -v {Fn2} < %t.ll
+  (* RUN: grep -v "Fn2" < %t.ll
    *)
   group "delete";
   let fn = declare_function "Fn2" ty m in
   delete_function fn;
   
-  (* RUN: grep {define.*Fn3} < %t.ll
+  (* RUN: grep "define.*Fn3" < %t.ll
    *)
   group "define";
   let fn = define_function "Fn3" ty m in
@@ -584,7 +584,7 @@ let test_functions () =
   insist (1 = Array.length (basic_blocks fn));
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
-  (* RUN: grep {define.*Fn4.*Param1.*Param2} < %t.ll
+  (* RUN: grep "define.*Fn4.*Param1.*Param2" < %t.ll
    *)
   group "params";
   let fn = define_function "Fn4" ty m in
@@ -598,7 +598,7 @@ let test_functions () =
   set_value_name "Param2" params.(1);
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
-  (* RUN: grep {fastcc.*Fn5} < %t.ll
+  (* RUN: grep "fastcc.*Fn5" < %t.ll
    *)
   group "callconv";
   let fn = define_function "Fn5" ty m in
@@ -608,7 +608,7 @@ let test_functions () =
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
   begin group "gc";
-    (* RUN: grep {Fn6.*gc.*shadowstack} < %t.ll
+    (* RUN: grep "Fn6.*gc.*shadowstack" < %t.ll
      *)
     let fn = define_function "Fn6" ty m in
     insist (None = gc fn);
@@ -694,7 +694,7 @@ let test_params () =
 let test_basic_blocks () =
   let ty = function_type void_type [| |] in
   
-  (* RUN: grep {Bb1} < %t.ll
+  (* RUN: grep "Bb1" < %t.ll
    *)
   group "entry";
   let fn = declare_function "X" ty m in
@@ -825,7 +825,7 @@ let test_builder () =
   
   group "ret void";
   begin
-    (* RUN: grep {ret void} < %t.ll
+    (* RUN: grep "ret void" < %t.ll
      *)
     let fty = function_type void_type [| |] in
     let fn = declare_function "X6" fty m in
@@ -835,7 +835,7 @@ let test_builder () =
 
   group "ret aggregate";
   begin
-      (* RUN: grep {ret \{ i8, i64 \} \{ i8 4, i64 5 \}} < %t.ll
+      (* RUN: grep "ret { i8, i64 } { i8 4, i64 5 }" < %t.ll
        *)
       let sty = struct_type context [| i8_type; i64_type |] in
       let fty = function_type sty [| |] in
@@ -895,14 +895,14 @@ let test_builder () =
   end;
 
   group "ret"; begin
-    (* RUN: grep {ret.*P1} < %t.ll
+    (* RUN: grep "ret.*P1" < %t.ll
      *)
     let ret = build_ret p1 atentry in
     position_before ret atentry
   end;
   
   group "br"; begin
-    (* RUN: grep {br.*Bb02} < %t.ll
+    (* RUN: grep "br.*Bb02" < %t.ll
      *)
     let bb02 = append_block context "Bb02" fn in
     let b = builder_at_end context bb02 in
@@ -910,7 +910,7 @@ let test_builder () =
   end;
   
   group "cond_br"; begin
-    (* RUN: grep {br.*build_br.*Bb03.*Bb00} < %t.ll
+    (* RUN: grep "br.*build_br.*Bb03.*Bb00" < %t.ll
      *)
     let bb03 = append_block context "Bb03" fn in
     let b = builder_at_end context bb03 in
@@ -919,8 +919,8 @@ let test_builder () =
   end;
   
   group "switch"; begin
-    (* RUN: grep {switch.*P1.*SwiBlock3} < %t.ll
-     * RUN: grep {2,.*SwiBlock2} < %t.ll
+    (* RUN: grep "switch.*P1.*SwiBlock3" < %t.ll
+     * RUN: grep "2,.*SwiBlock2" < %t.ll
      *)
     let bb1 = append_block context "SwiBlock1" fn in
     let bb2 = append_block context "SwiBlock2" fn in
@@ -934,9 +934,9 @@ let test_builder () =
   end;
 
   group "malloc/free"; begin
-      (* RUN: grep {call.*@malloc(i32 ptrtoint} < %t.ll
-       * RUN: grep {call.*@free(i8\*} < %t.ll
-       * RUN: grep {call.*@malloc(i32 %} < %t.ll
+      (* RUN: grep "call.*@malloc(i32 ptrtoint" < %t.ll
+       * RUN: grep "call.*@free(i8*" < %t.ll
+       * RUN: grep "call.*@malloc(i32 %" < %t.ll
        *)
       let bb1 = append_block context "MallocBlock1" fn in
       let m1 = (build_malloc (pointer_type i32_type) "m1"
@@ -947,7 +947,7 @@ let test_builder () =
   end;
 
   group "indirectbr"; begin
-    (* RUN: grep {indirectbr i8\\* blockaddress(@X7, %IBRBlock2), \\\[label %IBRBlock2, label %IBRBlock3\\\]} < %t.ll
+    (* RUN: grep "indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]" < %t.ll
      *)
     let bb1 = append_block context "IBRBlock1" fn in
 
@@ -964,8 +964,8 @@ let test_builder () =
   end;
   
   group "invoke"; begin
-    (* RUN: grep {build_invoke.*invoke.*P1.*P2} < %t.ll
-     * RUN: grep {to.*Bb04.*unwind.*Bblpad} < %t.ll
+    (* RUN: grep "build_invoke.*invoke.*P1.*P2" < %t.ll
+     * RUN: grep "to.*Bb04.*unwind.*Bblpad" < %t.ll
      *)
     let bb04 = append_block context "Bb04" fn in
     let b = builder_at_end context bb04 in
@@ -973,7 +973,7 @@ let test_builder () =
   end;
   
   group "unreachable"; begin
-    (* RUN: grep {unreachable} < %t.ll
+    (* RUN: grep "unreachable" < %t.ll
      *)
     let bb06 = append_block context "Bb06" fn in
     let b = builder_at_end context bb06 in
@@ -984,36 +984,36 @@ let test_builder () =
     let bb07 = append_block context "Bb07" fn in
     let b = builder_at_end context bb07 in
     
-    (* RUN: grep {%build_add = add i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nsw_add = add nsw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nuw_add = add nuw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_fadd = fadd float %F1, %F2} < %t.ll
-     * RUN: grep {%build_sub = sub i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nsw_sub = sub nsw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nuw_sub = sub nuw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_fsub = fsub float %F1, %F2} < %t.ll
-     * RUN: grep {%build_mul = mul i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nsw_mul = mul nsw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_nuw_mul = mul nuw i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_fmul = fmul float %F1, %F2} < %t.ll
-     * RUN: grep {%build_udiv = udiv i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_sdiv = sdiv i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_exact_sdiv = sdiv exact i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_fdiv = fdiv float %F1, %F2} < %t.ll
-     * RUN: grep {%build_urem = urem i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_srem = srem i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_frem = frem float %F1, %F2} < %t.ll
-     * RUN: grep {%build_shl = shl i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_lshl = lshr i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_ashl = ashr i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_and = and i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_or = or i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_xor = xor i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_neg = sub i32 0, %P1} < %t.ll
-     * RUN: grep {%build_nsw_neg = sub nsw i32 0, %P1} < %t.ll
-     * RUN: grep {%build_nuw_neg = sub nuw i32 0, %P1} < %t.ll
-     * RUN: grep {%build_fneg = fsub float .*0.*, %F1} < %t.ll
-     * RUN: grep {%build_not = xor i32 %P1, -1} < %t.ll
+    (* RUN: grep "%build_add = add i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nsw_add = add nsw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nuw_add = add nuw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_fadd = fadd float %F1, %F2" < %t.ll
+     * RUN: grep "%build_sub = sub i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nsw_sub = sub nsw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nuw_sub = sub nuw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_fsub = fsub float %F1, %F2" < %t.ll
+     * RUN: grep "%build_mul = mul i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nsw_mul = mul nsw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_nuw_mul = mul nuw i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_fmul = fmul float %F1, %F2" < %t.ll
+     * RUN: grep "%build_udiv = udiv i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_sdiv = sdiv i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_exact_sdiv = sdiv exact i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_fdiv = fdiv float %F1, %F2" < %t.ll
+     * RUN: grep "%build_urem = urem i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_srem = srem i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_frem = frem float %F1, %F2" < %t.ll
+     * RUN: grep "%build_shl = shl i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_lshl = lshr i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_ashl = ashr i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_and = and i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_or = or i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_xor = xor i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_neg = sub i32 0, %P1" < %t.ll
+     * RUN: grep "%build_nsw_neg = sub nsw i32 0, %P1" < %t.ll
+     * RUN: grep "%build_nuw_neg = sub nuw i32 0, %P1" < %t.ll
+     * RUN: grep "%build_fneg = fsub float .*0.*, %F1" < %t.ll
+     * RUN: grep "%build_not = xor i32 %P1, -1" < %t.ll
      *)
     ignore (build_add p1 p2 "build_add" b);
     ignore (build_nsw_add p1 p2 "build_nsw_add" b);
@@ -1052,13 +1052,13 @@ let test_builder () =
     let bb08 = append_block context "Bb08" fn in
     let b = builder_at_end context bb08 in
 
-    (* RUN: grep {%build_alloca = alloca i32} < %t.ll
-     * RUN: grep {%build_array_alloca = alloca i32, i32 %P2} < %t.ll
-     * RUN: grep {%build_load = load i32\\* %build_array_alloca} < %t.ll
-     * RUN: grep {store i32 %P2, i32\\* %build_alloca} < %t.ll
-     * RUN: grep {%build_gep = getelementptr i32\\* %build_array_alloca, i32 %P2} < %t.ll
-     * RUN: grep {%build_in_bounds_gep = getelementptr inbounds i32\\* %build_array_alloca, i32 %P2} < %t.ll
-     * RUN: grep {%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1} < %t.ll
+    (* RUN: grep "%build_alloca = alloca i32" < %t.ll
+     * RUN: grep "%build_array_alloca = alloca i32, i32 %P2" < %t.ll
+     * RUN: grep "%build_load = load i32* %build_array_alloca" < %t.ll
+     * RUN: grep "store i32 %P2, i32* %build_alloca" < %t.ll
+     * RUN: grep "%build_gep = getelementptr i32* %build_array_alloca, i32 %P2" < %t.ll
+     * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2" < %t.ll
+     * RUN: grep "%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1" < %t.ll
      *)
     let alloca = build_alloca i32_type "build_alloca" b in
     let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
@@ -1090,30 +1090,30 @@ let test_builder () =
   group "casts"; begin
     let void_ptr = pointer_type i8_type in
     
-    (* RUN: grep {%build_trunc = trunc i32 %P1 to i8} < %t.ll
-     * RUN: grep {%build_trunc2 = trunc i32 %P1 to i8} < %t.ll
-     * RUN: grep {%build_trunc3 = trunc i32 %P1 to i8} < %t.ll
-     * RUN: grep {%build_zext = zext i8 %build_trunc to i32} < %t.ll
-     * RUN: grep {%build_zext2 = zext i8 %build_trunc to i32} < %t.ll
-     * RUN: grep {%build_sext = sext i32 %build_zext to i64} < %t.ll
-     * RUN: grep {%build_sext2 = sext i32 %build_zext to i64} < %t.ll
-     * RUN: grep {%build_sext3 = sext i32 %build_zext to i64} < %t.ll
-     * RUN: grep {%build_uitofp = uitofp i64 %build_sext to float} < %t.ll
-     * RUN: grep {%build_sitofp = sitofp i32 %build_zext to double} < %t.ll
-     * RUN: grep {%build_fptoui = fptoui float %build_uitofp to i32} < %t.ll
-     * RUN: grep {%build_fptosi = fptosi double %build_sitofp to i64} < %t.ll
-     * RUN: grep {%build_fptrunc = fptrunc double %build_sitofp to float} < %t.ll
-     * RUN: grep {%build_fptrunc2 = fptrunc double %build_sitofp to float} < %t.ll
-     * RUN: grep {%build_fpext = fpext float %build_fptrunc to double} < %t.ll
-     * RUN: grep {%build_fpext2 = fpext float %build_fptrunc to double} < %t.ll
-     * RUN: grep {%build_inttoptr = inttoptr i32 %P1 to i8\\*} < %t.ll
-     * RUN: grep {%build_ptrtoint = ptrtoint i8\\* %build_inttoptr to i64} < %t.ll
-     * RUN: grep {%build_ptrtoint2 = ptrtoint i8\\* %build_inttoptr to i64} < %t.ll
-     * RUN: grep {%build_bitcast = bitcast i64 %build_ptrtoint to double} < %t.ll
-     * RUN: grep {%build_bitcast2 = bitcast i64 %build_ptrtoint to double} < %t.ll
-     * RUN: grep {%build_bitcast3 = bitcast i64 %build_ptrtoint to double} < %t.ll
-     * RUN: grep {%build_bitcast4 = bitcast i64 %build_ptrtoint to double} < %t.ll
-     * RUN: grep {%build_pointercast = bitcast i8\\* %build_inttoptr to i16\\*} < %t.ll
+    (* RUN: grep "%build_trunc = trunc i32 %P1 to i8" < %t.ll
+     * RUN: grep "%build_trunc2 = trunc i32 %P1 to i8" < %t.ll
+     * RUN: grep "%build_trunc3 = trunc i32 %P1 to i8" < %t.ll
+     * RUN: grep "%build_zext = zext i8 %build_trunc to i32" < %t.ll
+     * RUN: grep "%build_zext2 = zext i8 %build_trunc to i32" < %t.ll
+     * RUN: grep "%build_sext = sext i32 %build_zext to i64" < %t.ll
+     * RUN: grep "%build_sext2 = sext i32 %build_zext to i64" < %t.ll
+     * RUN: grep "%build_sext3 = sext i32 %build_zext to i64" < %t.ll
+     * RUN: grep "%build_uitofp = uitofp i64 %build_sext to float" < %t.ll
+     * RUN: grep "%build_sitofp = sitofp i32 %build_zext to double" < %t.ll
+     * RUN: grep "%build_fptoui = fptoui float %build_uitofp to i32" < %t.ll
+     * RUN: grep "%build_fptosi = fptosi double %build_sitofp to i64" < %t.ll
+     * RUN: grep "%build_fptrunc = fptrunc double %build_sitofp to float" < %t.ll
+     * RUN: grep "%build_fptrunc2 = fptrunc double %build_sitofp to float" < %t.ll
+     * RUN: grep "%build_fpext = fpext float %build_fptrunc to double" < %t.ll
+     * RUN: grep "%build_fpext2 = fpext float %build_fptrunc to double" < %t.ll
+     * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8*" < %t.ll
+     * RUN: grep "%build_ptrtoint = ptrtoint i8* %build_inttoptr to i64" < %t.ll
+     * RUN: grep "%build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64" < %t.ll
+     * RUN: grep "%build_bitcast = bitcast i64 %build_ptrtoint to double" < %t.ll
+     * RUN: grep "%build_bitcast2 = bitcast i64 %build_ptrtoint to double" < %t.ll
+     * RUN: grep "%build_bitcast3 = bitcast i64 %build_ptrtoint to double" < %t.ll
+     * RUN: grep "%build_bitcast4 = bitcast i64 %build_ptrtoint to double" < %t.ll
+     * RUN: grep "%build_pointercast = bitcast i8* %build_inttoptr to i16*" < %t.ll
      *)
     let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
     let inst29 = build_zext inst28 i32_type "build_zext" atentry in
@@ -1143,13 +1143,13 @@ let test_builder () =
   end;
   
   group "comparisons"; begin
-    (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll
-     * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll
-     * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll
-     * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll
-     * RUN: grep {%build_is_null.*= icmp eq.*%X0,.*null} < %t.ll
-     * RUN: grep {%build_is_not_null = icmp ne i8\\* %X1, null} < %t.ll
-     * RUN: grep {%build_ptrdiff} < %t.ll
+    (* RUN: grep "%build_icmp_ne = icmp ne i32 %P1, %P2" < %t.ll
+     * RUN: grep "%build_icmp_sle = icmp sle i32 %P2, %P1" < %t.ll
+     * RUN: grep "%build_fcmp_false = fcmp false float %F1, %F2" < %t.ll
+     * RUN: grep "%build_fcmp_true = fcmp true float %F2, %F1" < %t.ll
+     * RUN: grep "%build_is_null.*= icmp eq.*%X0,.*null" < %t.ll
+     * RUN: grep "%build_is_not_null = icmp ne i8* %X1, null" < %t.ll
+     * RUN: grep "%build_ptrdiff" < %t.ll
      *)
     ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
     ignore (build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry);
@@ -1165,14 +1165,14 @@ let test_builder () =
   end;
   
   group "miscellaneous"; begin
-    (* RUN: grep {%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)} < %t.ll
-     * RUN: grep {%build_select = select i1 %build_icmp, i32 %P1, i32 %P2} < %t.ll
-     * RUN: grep {%build_va_arg = va_arg i8\\*\\* null, i32} < %t.ll
-     * RUN: grep {%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2} < %t.ll
-     * RUN: grep {%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2} < %t.ll
-     * RUN: grep {%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>} < %t.ll
-     * RUN: grep {%build_insertvalue0 = insertvalue.*%bl, i32 1, 0} < %t.ll
-     * RUN: grep {%build_extractvalue = extractvalue.*%build_insertvalue1, 1} < %t.ll
+    (* RUN: grep "%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)" < %t.ll
+     * RUN: grep "%build_select = select i1 %build_icmp, i32 %P1, i32 %P2" < %t.ll
+     * RUN: grep "%build_va_arg = va_arg i8** null, i32" < %t.ll
+     * RUN: grep "%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2" < %t.ll
+     * RUN: grep "%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2" < %t.ll
+     * RUN: grep "%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>" < %t.ll
+     * RUN: grep "%build_insertvalue0 = insertvalue.*%bl, i32 1, 0" < %t.ll
+     * RUN: grep "%build_extractvalue = extractvalue.*%build_insertvalue1, 1" < %t.ll
      *)
     let ci = build_call fn [| p2; p1 |] "build_call" atentry in
     insist (CallConv.c = instruction_call_conv ci);
@@ -1215,8 +1215,8 @@ let test_builder () =
   end;
 
   group "metadata"; begin
-    (* RUN: grep {%metadata = add i32 %P1, %P2, !test !0} < %t.ll
-     * RUN: grep {!0 = metadata !\{i32 1, metadata !"metadata test"\}} < %t.ll
+    (* RUN: grep '%metadata = add i32 %P1, %P2, !test !0' < %t.ll
+     * RUN: grep '!0 = metadata !{i32 1, metadata !"metadata test"}' < %t.ll
      *)
     let i = build_add p1 p2 "metadata" atentry in
     insist ((has_metadata i) = false);
@@ -1240,8 +1240,8 @@ let test_builder () =
   end;
 
   group "dbg"; begin
-    (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll
-     * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll
+    (* RUN: grep "%dbg = add i32 %P1, %P2, !dbg !1" < %t.ll
+     * RUN: grep "!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}" < %t.ll
      *)
     insist ((current_debug_location atentry) = None);
 
@@ -1261,7 +1261,7 @@ let test_builder () =
   end;
   
   group "phi"; begin
-    (* RUN: grep {PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2} < %t.ll
+    (* RUN: grep "PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2" < %t.ll
      *)
     let b1 = append_block context "PhiBlock1" fn in
     let b2 = append_block context "PhiBlock2" fn in
diff --git a/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll b/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
new file mode 100644
index 0000000..583b9a8
--- /dev/null
+++ b/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
@@ -0,0 +1,33 @@
+; RUN: rm -f %t.bc
+; RUN: rm -f %t.ll
+; RUN: rm -f %t2.bc
+; RUN: rm -f %t2.ll
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | tail -n +2 > %t.ll
+; RUN: llvm-as %t.ll -o %t2.bc
+; RUN: llvm-dis %t2.bc -o - | tail -n +2 > %t2.ll
+; RUN: llvm-diff %t.ll %t2.ll
+
+define void @test() {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  switch i32 %c, label %exit [
+      i32 1, label %exit
+      i32 2, label %exit
+  ]
+exit:
+  ret void
+}
+define void @test_wide() {
+  %mem = alloca i256
+  store i256 2, i256* %mem
+  %c = load i256* %mem
+  switch i256 %c, label %exit [
+      i256 123456789012345678901234567890, label %exit
+      i256 2, label %exit
+  ]
+exit:
+  ret void
+}
+
diff --git a/test/Bitcode/arm32_neon_vcnt_upgrade.ll b/test/Bitcode/arm32_neon_vcnt_upgrade.ll
new file mode 100644
index 0000000..10b9284
--- /dev/null
+++ b/test/Bitcode/arm32_neon_vcnt_upgrade.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Tests vclz and vcnt
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: @vclz16
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+;CHECK: {{call.*@llvm.ctlz.v4i16\(<4 x i16>.*, i1 false}}
+        ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: @vcnt8
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+;CHECK: call <8 x i8> @llvm.ctpop.v8i8(<8 x i8>
+        ret <8 x i8> %tmp2
+}
+
+declare <4 x i16>  @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
new file mode 100644
index 0000000..502e967
--- /dev/null
+++ b/test/Bitcode/attributes.ll
@@ -0,0 +1,164 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; PR12696
+
+define void @f1(i8 zeroext)
+; CHECK: define void @f1(i8 zeroext)
+{
+        ret void;
+}
+
+define void @f2(i8 signext)
+; CHECK: define void @f2(i8 signext)
+{
+        ret void;
+}
+
+define void @f3() noreturn
+; CHECK: define void @f3() noreturn
+{
+        ret void;
+}
+
+define void @f4(i8 inreg)
+; CHECK: define void @f4(i8 inreg)
+{
+        ret void;
+}
+
+define void @f5(i8* sret)
+; CHECK: define void @f5(i8* sret)
+{
+        ret void;
+}
+
+define void @f6() nounwind
+; CHECK: define void @f6() nounwind
+{
+        ret void;
+}
+
+define void @f7(i8* noalias)
+; CHECK: define void @f7(i8* noalias)
+{
+        ret void;
+}
+
+define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval)
+{
+        ret void;
+}
+
+define void @f9(i8* nest)
+; CHECK: define void @f9(i8* nest)
+{
+        ret void;
+}
+
+define void @f10() readnone
+; CHECK: define void @f10() readnone
+{
+        ret void;
+}
+
+define void @f11() readonly
+; CHECK: define void @f11() readonly
+{
+        ret void;
+}
+
+define void @f12() noinline
+; CHECK: define void @f12() noinline
+{
+        ret void;
+}
+
+define void @f13() alwaysinline
+; CHECK: define void @f13() alwaysinline
+{
+        ret void;
+}
+
+define void @f14() optsize
+; CHECK: define void @f14() optsize
+{
+        ret void;
+}
+
+define void @f15() ssp
+; CHECK: define void @f15() ssp
+{
+        ret void;
+}
+
+define void @f16() sspreq
+; CHECK: define void @f16() sspreq
+{
+        ret void;
+}
+
+define void @f17(i8 align 4)
+; CHECK: define void @f17(i8 align 4)
+{
+        ret void;
+}
+
+define void @f18(i8* nocapture)
+; CHECK: define void @f18(i8* nocapture)
+{
+        ret void;
+}
+
+define void @f19() noredzone
+; CHECK: define void @f19() noredzone
+{
+        ret void;
+}
+
+define void @f20() noimplicitfloat
+; CHECK: define void @f20() noimplicitfloat
+{
+        ret void;
+}
+
+define void @f21() naked
+; CHECK: define void @f21() naked
+{
+        ret void;
+}
+
+define void @f22() inlinehint
+; CHECK: define void @f22() inlinehint
+{
+        ret void;
+}
+
+define void @f23() alignstack(4)
+; CHECK: define void @f23() alignstack(4)
+{
+        ret void;
+}
+
+define void @f24() returns_twice
+; CHECK: define void @f24() returns_twice
+{
+        ret void;
+}
+
+define void @f25() uwtable
+; CHECK: define void @f25() uwtable
+{
+        ret void;
+}
+
+define void @f26() nonlazybind
+; CHECK: define void @f26() nonlazybind
+{
+        ret void;
+}
+
+define void @f27() address_safety
+; CHECK: define void @f27() address_safety
+{
+        ret void;
+}
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
index b972753..8502b0d 100644
--- a/test/Bitcode/null-type.ll
+++ b/test/Bitcode/null-type.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-dis < %s.bc > /dev/null |& FileCheck %s
+; RUN: not llvm-dis < %s.bc > /dev/null 2> %t
+; RUN: FileCheck %s < %t
 ; PR8494
 
 ; CHECK: Invalid MODULE_CODE_FUNCTION record
diff --git a/test/Bitcode/ptest-new.ll b/test/Bitcode/ptest-new.ll
new file mode 100644
index 0000000..276fb7a
--- /dev/null
+++ b/test/Bitcode/ptest-new.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define i32 @foo(<2 x i64> %bar) nounwind {
+entry:
+; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
+ %res1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %bar, <2 x i64> %bar)
+; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
+ %res2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %bar, <2 x i64> %bar)
+; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
+ %res3 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %bar, <2 x i64> %bar)
+ %add1 = add i32 %res1, %res2
+ %add2 = add i32 %add1, %res2
+ ret i32 %add2
+}
+
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
new file mode 100644
index 0000000..fc6ed8e
--- /dev/null
+++ b/test/Bitcode/ptest-old.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define i32 @foo(<4 x float> %bar) nounwind {
+entry:
+; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
+ %res1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %bar, <4 x float> %bar)
+; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> 
+ %res2 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %bar, <4 x float> %bar)
+; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
+ %res3 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %bar, <4 x float> %bar)
+ %add1 = add i32 %res1, %res2
+ %add2 = add i32 %add1, %res2
+ ret i32 %add2
+}
+
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8cebb7c..991cc9d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,81 +1,30 @@
-foreach(c ${LLVM_TARGETS_TO_BUILD})
-  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
-endforeach(c)
-set(TARGETS_TO_BUILD ${TARGETS_BUILT})
-
-# FIXME: This won't work for project files, we need to use a --param.
-set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
-set(SHLIBEXT "${LTDL_SHLIB_EXT}")
-
-set(SHLIBDIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
-
-if(BUILD_SHARED_LIBS)
-  set(LLVM_SHARED_LIBS_ENABLED "1")
-else()
-  set(LLVM_SHARED_LIBS_ENABLED "0")
-endif(BUILD_SHARED_LIBS)
-
-if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-  set(SHLIBPATH_VAR "DYLD_LIBRARY_PATH")
-else() # Default for all other unix like systems.
-  # CMake hardcodes the library locaction using rpath.
-  # Therefore LD_LIBRARY_PATH is not required to run binaries in the
-  # build dir. We pass it anyways.
-  set(SHLIBPATH_VAR "LD_LIBRARY_PATH")
-endif()
-
-set(LIT_ARGS "${LLVM_LIT_ARGS}")
-separate_arguments(LIT_ARGS)
-
-configure_file(
-  ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
-  ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
-
-MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
-
-# Configuration-time: See Unit/lit.site.cfg.in
-set(LLVM_BUILD_MODE "%(build_mode)s")
-
-set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
-set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
-set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
-set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
-set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
-set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
-
-if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
-  set(ENABLE_ASSERTIONS "1")
-else()
-  set(ENABLE_ASSERTIONS "0")
-endif()
-
-configure_file(
+configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-  @ONLY)
-configure_file(
+  )
+configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-  @ONLY)
-
-add_custom_target(check
-  COMMAND ${PYTHON_EXECUTABLE}
-              ${LLVM_SOURCE_DIR}/utils/lit/lit.py
-              --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-              --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-              --param build_config=${CMAKE_CFG_INTDIR}
-              --param build_mode=${RUNTIME_BUILD_MODE}
-              ${LIT_ARGS}
-              ${CMAKE_CURRENT_BINARY_DIR}
-              COMMENT "Running LLVM regression tests")
-
-add_custom_target(check.deps)
-add_dependencies(check check.deps)
-add_dependencies(check.deps
-              UnitTests
-              BugpointPasses LLVMHello
-              llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump
-              llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
-              macho-dump opt
-              FileCheck count not json-bench)
-set_target_properties(check.deps PROPERTIES FOLDER "Tests")
+  )
+
+add_lit_testsuite(check-llvm "Running the LLVM regression tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+         llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+  DEPENDS UnitTests
+          BugpointPasses LLVMHello
+          llc lli llvm-ar llvm-as
+          llvm-diff
+          llvm-dis llvm-extract llvm-dwarfdump
+          llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
+          macho-dump opt
+          FileCheck count not
+          yaml2obj
+  )
+set_target_properties(check-llvm PROPERTIES FOLDER "Tests")
+
+# Setup a legacy alias for 'check-llvm'. This will likely change to be an
+# alias for 'check-all' at some point in the future.
+add_custom_target(check)
+add_dependencies(check check-llvm)
+set_target_properties(check PROPERTIES FOLDER "Tests")
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 33f935e..a63cdd4 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
-; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
+; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
 ; | grep 35
 
 define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index b543c57..8d3337c 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep "add.*#0"
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index d2eb85d..670048b 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep "str.*\!"
 
 	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
 	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index fd2f462..3754db0 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast -optimize-regalloc=0
 ; PR1925
 
 	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index 44da8e7..5fbed0d 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast -optimize-regalloc=0
 ; PR1925
 
 	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 3526722..7342f69 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep "swi 107"
 
 define i32 @_swilseek(i32) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 813bf3c..7d4cc6e 100644
--- a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs
 target triple = "arm-pc-linux-gnu"
 
 ; This test case would accidentally use the same physreg for two virtregs
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 5ce600d..b21bb00 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; Radar 10266272
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios4.0.0"
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 872eca3..f1c85f1 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -60,8 +60,16 @@ for.end:                                          ; preds = %entry
   ret void
 }
 
+; Check that pseudo-expansion preserves <undef> flags.
+define void @foo3(i8* %p) nounwind ssp {
+entry:
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
+  ret void
+}
+
 declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
 
 !0 = metadata !{metadata !"omnipotent char", metadata !1}
 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
new file mode 100644
index 0000000..b3a7e34
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
@@ -0,0 +1,71 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 < %s
+
+; CodeGen SplitCriticalEdge() shouldn't try to break edge to a landing pad.
+; rdar://11300144
+
+%0 = type opaque
+%class.FunctionInterpreter.3.15.31 = type { %class.Parser.1.13.29, %class.Parser.1.13.29*, %struct.ParserVariable.2.14.30*, i32 }
+%class.Parser.1.13.29 = type { i32 (...)**, %class.Parser.1.13.29* }
+%struct.ParserVariable.2.14.30 = type opaque
+%struct.ParseErrorMsg.0.12.28 = type { i32, i32, i32 }
+
+@_ZTI13ParseErrorMsg = external hidden unnamed_addr constant { i8*, i8* }
+@"OBJC_IVAR_$_MUMathExpressionDoubleBased.mInterpreter" = external hidden global i32, section "__DATA, __objc_ivar", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_14" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp {
+entry:
+  %call = invoke double undef(%class.FunctionInterpreter.3.15.31* undef) optsize
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI13ParseErrorMsg to i8*)
+  br i1 undef, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.ParseErrorMsg.0.12.28*)*)(i8* undef, i8* undef, %struct.ParseErrorMsg.0.12.28* undef) optsize
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %catch
+  br label %try.cont
+
+try.cont:                                         ; preds = %invoke.cont2, %entry
+  %value.0 = phi double [ 0x7FF8000000000000, %invoke.cont2 ], [ %call, %entry ]
+  ret double %value.0
+
+lpad1:                                            ; preds = %catch
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1, %lpad
+  resume { i8*, i32 } undef
+
+terminate.lpad:                                   ; preds = %lpad1
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+declare i32 @__gxx_personality_sj0(...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM/2012-05-29-TailDupBug.ll b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
new file mode 100644
index 0000000..1a57f04
--- /dev/null
+++ b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
@@ -0,0 +1,140 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -verify-machineinstrs < %s
+
+; Teach taildup to update livein set to appease verifier.
+; rdar://11538365
+
+%struct.__CFString.2 = type opaque
+
+declare void @CFRelease(i8*)
+
+define hidden fastcc i32 @t() ssp {
+entry:
+  %mylocale.i.i = alloca [256 x i8], align 1
+  br i1 undef, label %return, label %CFStringIsHyphenationAvailableForLocale.exit
+
+CFStringIsHyphenationAvailableForLocale.exit:     ; preds = %entry
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %CFStringIsHyphenationAvailableForLocale.exit
+  br i1 undef, label %if.end8.thread.i, label %if.then.i
+
+if.then.i:                                        ; preds = %if.end
+  br i1 undef, label %if.end8.thread.i, label %if.end8.i
+
+if.end8.thread.i:                                 ; preds = %if.then.i, %if.end
+  unreachable
+
+if.end8.i:                                        ; preds = %if.then.i
+  br i1 undef, label %if.then11.i, label %__CFHyphenationPullTokenizer.exit
+
+if.then11.i:                                      ; preds = %if.end8.i
+  unreachable
+
+__CFHyphenationPullTokenizer.exit:                ; preds = %if.end8.i
+  br i1 undef, label %if.end68, label %if.then3
+
+if.then3:                                         ; preds = %__CFHyphenationPullTokenizer.exit
+  br i1 undef, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %if.then3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %if.then3
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %cond.end
+  unreachable
+
+while.end:                                        ; preds = %cond.end
+  br i1 undef, label %if.end5.i, label %if.then.i16
+
+if.then.i16:                                      ; preds = %while.end
+  br i1 undef, label %if.then4.i, label %if.end5.i
+
+if.then4.i:                                       ; preds = %if.then.i16
+  br i1 false, label %cleanup.thread, label %if.end.i20
+
+if.end5.i:                                        ; preds = %if.then.i16, %while.end
+  unreachable
+
+if.end.i20:                                       ; preds = %if.then4.i
+  br label %for.body.i146.i
+
+for.body.i146.i:                                  ; preds = %for.body.i146.i, %if.end.i20
+  br i1 undef, label %if.end20.i, label %for.body.i146.i
+
+if.end20.i:                                       ; preds = %for.body.i146.i
+  br i1 undef, label %cleanup.thread, label %if.end23.i
+
+if.end23.i:                                       ; preds = %if.end20.i
+  br label %for.body.i94.i
+
+for.body.i94.i:                                   ; preds = %for.body.i94.i, %if.end23.i
+  br i1 undef, label %if.then28.i, label %for.body.i94.i
+
+if.then28.i:                                      ; preds = %for.body.i94.i
+  br i1 undef, label %cond.true.i26, label %land.lhs.true
+
+cond.true.i26:                                    ; preds = %if.then28.i
+  br label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %cond.true.i26, %if.then28.i
+  br i1 false, label %cleanup.thread, label %if.end35
+
+if.end35:                                         ; preds = %land.lhs.true
+  br i1 undef, label %cleanup.thread, label %if.end45
+
+if.end45:                                         ; preds = %if.end35
+  br i1 undef, label %if.then50, label %if.end.i37
+
+if.end.i37:                                       ; preds = %if.end45
+  br label %if.then50
+
+if.then50:                                        ; preds = %if.end.i37, %if.end45
+  br i1 undef, label %__CFHyphenationGetHyphensForString.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %if.then50
+  br i1 undef, label %cleanup.i, label %cond.true.i
+
+cond.true.i:                                      ; preds = %if.end.i
+  br i1 undef, label %for.cond16.preheader.i, label %for.cond57.preheader.i
+
+for.cond16.preheader.i:                           ; preds = %cond.true.i
+  %cmp1791.i = icmp sgt i32 undef, 1
+  br i1 %cmp1791.i, label %for.body18.i, label %for.cond57.preheader.i
+
+for.cond57.preheader.i:                           ; preds = %for.cond16.preheader.i, %cond.true.i
+  %sub69.i = add i32 undef, -2
+  br label %cleanup.i
+
+for.body18.i:                                     ; preds = %for.cond16.preheader.i
+  store i16 0, i16* undef, align 2
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %while.body.i, %for.body18.i
+  br label %while.body.i
+
+cleanup.i:                                        ; preds = %for.cond57.preheader.i, %if.end.i
+  br label %__CFHyphenationGetHyphensForString.exit
+
+__CFHyphenationGetHyphensForString.exit:          ; preds = %cleanup.i, %if.then50
+  %retval.1.i = phi i32 [ 0, %cleanup.i ], [ -1, %if.then50 ]
+  %phitmp = bitcast %struct.__CFString.2* null to i8*
+  br label %if.end68
+
+cleanup.thread:                                   ; preds = %if.end35, %land.lhs.true, %if.end20.i, %if.then4.i
+  call void @llvm.stackrestore(i8* null)
+  br label %return
+
+if.end68:                                         ; preds = %__CFHyphenationGetHyphensForString.exit, %__CFHyphenationPullTokenizer.exit
+  %hyphenCount.2 = phi i32 [ %retval.1.i, %__CFHyphenationGetHyphensForString.exit ], [ 0, %__CFHyphenationPullTokenizer.exit ]
+  %_token.1 = phi i8* [ %phitmp, %__CFHyphenationGetHyphensForString.exit ], [ undef, %__CFHyphenationPullTokenizer.exit ]
+  call void @CFRelease(i8* %_token.1)
+  br label %return
+
+return:                                           ; preds = %if.end68, %cleanup.thread, %CFStringIsHyphenationAvailableForLocale.exit, %entry
+  %retval.1 = phi i32 [ %hyphenCount.2, %if.end68 ], [ -1, %CFStringIsHyphenationAvailableForLocale.exit ], [ -1, %cleanup.thread ], [ -1, %entry ]
+  ret i32 %retval.1
+}
+
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
new file mode 100644
index 0000000..b05ec63
--- /dev/null
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -o /dev/null "-mtriple=thumbv7-apple-ios" -debug-only=post-RA-sched 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; Make sure that mayalias store-load dependencies have one cycle
+; latency regardless of whether they are barriers or not.
+
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+  store volatile i32 65540, i32* %p1, align 4, !tbaa !0
+  %0 = load volatile i32* %p2, align 4, !tbaa !0
+  ret i32 %0
+}
+
+define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+  store i32 65540, i32* %p1, align 4, !tbaa !0
+  %0 = load i32* %p2, align 4, !tbaa !0
+  ret i32 %0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
new file mode 100644
index 0000000..e4ad45b
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s
+; PR13377
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable {
+  br i1 undef, label %4, label %5
+
+; <label>:4                                       ; preds = %3
+  unreachable
+
+; <label>:5                                       ; preds = %3
+  br i1 undef, label %7, label %6
+
+; <label>:6                                       ; preds = %5
+  unreachable
+
+; <label>:7                                       ; preds = %5
+  br i1 undef, label %8, label %10
+
+; <label>:8                                       ; preds = %7
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br i1 undef, label %11, label %10
+
+; <label>:10                                      ; preds = %9, %8, %7
+  unreachable
+
+; <label>:11                                      ; preds = %9
+  br i1 undef, label %13, label %12
+
+; <label>:12                                      ; preds = %11
+  unreachable
+
+; <label>:13                                      ; preds = %11
+  br i1 undef, label %15, label %14
+
+; <label>:14                                      ; preds = %13
+  unreachable
+
+; <label>:15                                      ; preds = %13
+  br i1 undef, label %18, label %16
+
+; <label>:16                                      ; preds = %15
+  br i1 undef, label %17, label %18
+
+; <label>:17                                      ; preds = %16
+  unreachable
+
+; <label>:18                                      ; preds = %16, %15
+  br i1 undef, label %68, label %19
+
+; <label>:19                                      ; preds = %18
+  br label %20
+
+; <label>:20                                      ; preds = %20, %19
+  br i1 undef, label %21, label %20
+
+; <label>:21                                      ; preds = %20
+  br i1 undef, label %22, label %68
+
+; <label>:22                                      ; preds = %21
+  br i1 undef, label %23, label %24
+
+; <label>:23                                      ; preds = %22
+  unreachable
+
+; <label>:24                                      ; preds = %22
+  br i1 undef, label %26, label %25
+
+; <label>:25                                      ; preds = %24
+  unreachable
+
+; <label>:26                                      ; preds = %24
+  br i1 undef, label %28, label %27
+
+; <label>:27                                      ; preds = %26
+  unreachable
+
+; <label>:28                                      ; preds = %26
+  br i1 undef, label %29, label %30, !prof !0
+
+; <label>:29                                      ; preds = %28
+  br label %30
+
+; <label>:30                                      ; preds = %29, %28
+  br i1 undef, label %31, label %32, !prof !0
+
+; <label>:31                                      ; preds = %30
+  br label %32
+
+; <label>:32                                      ; preds = %31, %30
+  br i1 undef, label %34, label %33
+
+; <label>:33                                      ; preds = %32
+  unreachable
+
+; <label>:34                                      ; preds = %32
+  br i1 undef, label %35, label %36, !prof !0
+
+; <label>:35                                      ; preds = %34
+  br label %36
+
+; <label>:36                                      ; preds = %35, %34
+  br i1 undef, label %37, label %38, !prof !0
+
+; <label>:37                                      ; preds = %36
+  br label %38
+
+; <label>:38                                      ; preds = %37, %36
+  br i1 undef, label %39, label %67
+
+; <label>:39                                      ; preds = %38
+  br i1 undef, label %40, label %41
+
+; <label>:40                                      ; preds = %39
+  br i1 undef, label %64, label %41
+
+; <label>:41                                      ; preds = %40, %39
+  br i1 undef, label %64, label %42
+
+; <label>:42                                      ; preds = %41
+  %43 = fadd <4 x float> undef, undef
+  %44 = fadd <4 x float> undef, undef
+  %45 = fmul <4 x float> undef, undef
+  %46 = fmul <4 x float> %45, %43
+  %47 = fmul <4 x float> undef, %44
+  %48 = load <4 x float>* undef, align 8, !tbaa !1
+  %49 = bitcast <4 x float> %48 to <2 x i64>
+  %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
+  %51 = bitcast <1 x i64> %50 to <2 x float>
+  %52 = shufflevector <2 x float> %51, <2 x float> undef, <4 x i32> zeroinitializer
+  %53 = bitcast <4 x float> %52 to <2 x i64>
+  %54 = shufflevector <2 x i64> %53, <2 x i64> undef, <1 x i32> zeroinitializer
+  %55 = bitcast <1 x i64> %54 to <2 x float>
+  %56 = extractelement <2 x float> %55, i32 0
+  %57 = insertelement <4 x float> undef, float %56, i32 2
+  %58 = insertelement <4 x float> %57, float 1.000000e+00, i32 3
+  %59 = fsub <4 x float> %47, %58
+  %60 = fmul <4 x float> undef, undef
+  %61 = fmul <4 x float> %59, %60
+  %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
+  %63 = fadd <4 x float> %47, %62
+  store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1
+  call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
+  call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
+  store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1
+  unreachable
+
+; <label>:64                                      ; preds = %41, %40
+  br i1 undef, label %65, label %66
+
+; <label>:65                                      ; preds = %64
+  unreachable
+
+; <label>:66                                      ; preds = %64
+  unreachable
+
+; <label>:67                                      ; preds = %38
+  unreachable
+
+; <label>:68                                      ; preds = %21, %18
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(%0*, float)
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
new file mode 100644
index 0000000..bdcd1b6
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR13111
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-none-linux-gnueabi"
+
+define void @test_hi_char8() noinline {
+entry:
+  %0 = load <4 x i8>* undef, align 1
+  store <4 x i8> %0, <4 x i8>* null, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
new file mode 100644
index 0000000..b55f1ca
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+
+@var_v2i8 = global <2 x i8> zeroinitializer
+@var_v4i8 = global <4 x i8> zeroinitializer
+
+@var_v2i16 = global <2 x i16> zeroinitializer
+@var_v4i16 = global <4 x i16> zeroinitializer
+
+@var_v2i32 = global <2 x i32> zeroinitializer
+@var_v4i32 = global <4 x i32> zeroinitializer
+
+@var_v2i64 = global <2 x i64> zeroinitializer
+
+define void @test_v2i8tov2i32() {
+; CHECK: test_v2i8tov2i32:
+
+  %i8val = load <2 x i8>* @var_v2i8
+
+  %i32val = sext <2 x i8> %i8val to <2 x i32>
+  store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @test_v2i8tov2i64() {
+; CHECK: test_v2i8tov2i64:
+
+  %i8val = load <2 x i8>* @var_v2i8
+
+  %i64val = sext <2 x i8> %i8val to <2 x i64>
+  store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
+
+;  %i64val = sext <2 x i8> %i8val to <2 x i64>
+;  store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+
+  ret void
+}
+
+define void @test_v4i8tov4i16() {
+; CHECK: test_v4i8tov4i16:
+
+  %i8val = load <4 x i8>* @var_v4i8
+
+  %i16val = sext <4 x i8> %i8val to <4 x i16>
+  store <4 x i16> %i16val, <4 x i16>* @var_v4i16
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl.s16
+
+  ret void
+; CHECK: bx lr
+}
+
+define void @test_v4i8tov4i32() {
+; CHECK: test_v4i8tov4i32:
+
+  %i8val = load <4 x i8>* @var_v4i8
+
+  %i16val = sext <4 x i8> %i8val to <4 x i32>
+  store <4 x i32> %i16val, <4 x i32>* @var_v4i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @test_v2i16tov2i32() {
+; CHECK: test_v2i16tov2i32:
+
+  %i16val = load <2 x i16>* @var_v2i16
+
+  %i32val = sext <2 x i16> %i16val to <2 x i32>
+  store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl
+
+  ret void
+; CHECK: bx lr
+}
+
+define void @test_v2i16tov2i64() {
+; CHECK: test_v2i16tov2i64:
+
+  %i16val = load <2 x i16>* @var_v2i16
+
+  %i64val = sext <2 x i16> %i16val to <2 x i64>
+  store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/2012-08-13-bfi.ll b/test/CodeGen/ARM/2012-08-13-bfi.ll
new file mode 100644
index 0000000..8263833
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-13-bfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: foo
+; CHECK-NOT: bfi
+; CHECK: bx
+define i32 @foo(i8 zeroext %i) nounwind uwtable readnone ssp {
+  %1 = and i8 %i, 15
+  %2 = zext i8 %1 to i32
+  %3 = icmp ult i8 %1, 10
+  %4 = or i32 %2, 48
+  %5 = add nsw i32 %2, 55
+  %6 = select i1 %3, i32 %4, i32 %5
+  ret i32 %6
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 9ccff07..6da9089 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 31c5007..d668334 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
-; RUN: grep { = } %t   | count 5
+; RUN: grep " = " %t   | count 5
 ; RUN: grep globl %t | count 4
 ; RUN: grep weak %t  | count 1
 
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 396de37..5e12d8e 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -57,3 +57,12 @@ store i64 %0, i64* @f3_var, align 4
 store i64 %1, i64* @f3_var, align 4
 ret void
 }
+
+define i64 @f4(i64* %val) nounwind {
+entry:
+  ;CHECK: f4
+  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
+  ;CHECK: mov r0, [[REG1]]
+  %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
+  ret i64 %0
+}
diff --git a/test/CodeGen/ARM/bicZext.ll b/test/CodeGen/ARM/bicZext.ll
new file mode 100644
index 0000000..cf4b7ba
--- /dev/null
+++ b/test/CodeGen/ARM/bicZext.ll
@@ -0,0 +1,19 @@
+; RUN: llc %s -o - | FileCheck %s
+; ModuleID = 'bic.c'
+target triple = "thumbv7-apple-ios3.0.0"
+
+define zeroext i16 @foo16(i16 zeroext %f) nounwind readnone optsize ssp {
+entry:
+  ; CHECK: .thumb_func	_foo16
+  ; CHECK: {{bic[^#]*#3}}
+  %and = and i16 %f, -4
+  ret i16 %and
+}
+
+define i32 @foo32(i32 %f) nounwind readnone optsize ssp {
+entry:
+  ; CHECK: .thumb_func	_foo32
+  ; CHECK: {{bic[^#]*#3}}
+  %and = and i32 %f, -4
+  ret i32 %and
+}
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index efe29d8..00b1688 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   not grep {bx lr}
+; RUN:   not grep "bx lr"
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @r = external global [14 x i32]		; <[14 x i32]*> [#uses=4]
diff --git a/test/CodeGen/ARM/cmn.ll b/test/CodeGen/ARM/cmn.ll
new file mode 100644
index 0000000..ef73165
--- /dev/null
+++ b/test/CodeGen/ARM/cmn.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple thumbv7-apple-ios | FileCheck %s
+; <rdar://problem/7569620>
+
+define i32 @compare_i_gt(i32 %a) {
+entry:
+; CHECK:     compare_i_gt
+; CHECK-NOT: mvn
+; CHECK:     cmn
+  %cmp = icmp sgt i32 %a, -78
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
+define i32 @compare_r_eq(i32 %a, i32 %b) {
+entry:
+; CHECK: compare_r_eq
+; CHECK: cmn
+  %sub = sub nsw i32 0, %b
+  %cmp = icmp eq i32 %a, %sub
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
new file mode 100644
index 0000000..fb0f4c6
--- /dev/null
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; CHECK: f
+; The vld2 and vst2 are not aligned wrt each other, the second Q loaded is the
+; first one stored.
+; The coalescer must find a super-register larger than QQ to eliminate the copy
+; setting up the vst2 data.
+; CHECK: vld2
+; CHECK-NOT: vorr
+; CHECK-NOT: vmov
+; CHECK: vst2
+define void @f(float* %p, i32 %c) nounwind ssp {
+entry:
+  %0 = bitcast float* %p to i8*
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+  %add.ptr = getelementptr inbounds float* %p, i32 8
+  %1 = bitcast float* %add.ptr to i8*
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
+  ret void
+}
+
+; CHECK: f1
+; FIXME: This function still has copies.
+define void @f1(float* %p, i32 %c) nounwind ssp {
+entry:
+  %0 = bitcast float* %p to i8*
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+  %add.ptr = getelementptr inbounds float* %p, i32 8
+  %1 = bitcast float* %add.ptr to i8*
+  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
+  ret void
+}
+
+; CHECK: f2
+; FIXME: This function still has copies.
+define void @f2(float* %p, i32 %c) nounwind ssp {
+entry:
+  %0 = bitcast float* %p to i8*
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ]
+  %c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ]
+  %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
+  %add.ptr = getelementptr inbounds float* %p.addr.0, i32 8
+  %1 = bitcast float* %add.ptr to i8*
+  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+  %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
+  %dec = add nsw i32 %c.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll
index 8a865e2..a3d49f6 100644
--- a/test/CodeGen/ARM/crash-greedy.ll
+++ b/test/CodeGen/ARM/crash-greedy.ll
@@ -82,3 +82,49 @@ if.then195:                                       ; preds = %if.then84
 if.end251:                                        ; preds = %if.then195, %if.then84, %entry
   ret void
 }
+
+; Coalescer failure: removeCopyByCommutingDef leaves a bad kill flag
+; behind.
+define void @rdar11950722() nounwind readonly optsize ssp align 2 {
+entry:
+  br i1 undef, label %land.lhs.true7, label %lor.lhs.false.i
+
+lor.lhs.false.i:
+  br i1 undef, label %if.then10.i, label %land.lhs.true7
+
+if.then10.i:
+  %xFlags.1.i = select i1 undef, i32 0, i32 undef
+  br i1 undef, label %land.lhs.true33.i, label %f.exit
+
+land.lhs.true33.i:
+  %and26.i = and i32 %xFlags.1.i, 8
+  %cmp27.i = icmp eq i32 %and26.i, 0
+  %and29.i = and i32 %xFlags.1.i, 2147483645
+  %xFlags.1.and29.i = select i1 %cmp27.i, i32 %xFlags.1.i, i32 %and29.i
+  %and34.i = and i32 %xFlags.1.i, 8
+  %cmp35.i = icmp eq i32 %and34.i, 0
+  %and37.i = and i32 %xFlags.1.i, 2147483645
+  %yFlags.1.and37.i = select i1 %cmp35.i, i32 %xFlags.1.i, i32 %and37.i
+  br label %f.exit
+
+f.exit:
+  %xFlags.3.i = phi i32 [ %xFlags.1.and29.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+  %yFlags.2.i = phi i32 [ %yFlags.1.and37.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+  %cmp40.i = icmp eq i32 %xFlags.3.i, %yFlags.2.i
+  br i1 %cmp40.i, label %land.lhs.true7, label %land.end
+
+land.lhs.true7:
+  br i1 undef, label %land.lhs.true34, label %lor.lhs.false27
+
+lor.lhs.false27:
+  br i1 undef, label %land.lhs.true34, label %land.end
+
+land.lhs.true34:
+  br i1 undef, label %land.end, label %lor.lhs.false44
+
+lor.lhs.false44:
+  ret void
+
+land.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 1d011be..62b9e43 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll
new file mode 100644
index 0000000..a66a9d1
--- /dev/null
+++ b/test/CodeGen/ARM/data-in-code-annotations.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+define double @f1() nounwind {
+; CHECK: f1:
+; CHECK: .data_region
+; CHECK: .long 1413754129
+; CHECK: .long 1074340347
+; CHECK: .end_data_region
+  ret double 0x400921FB54442D11
+}
+
+
+define i32 @f2()  {
+; CHECK: f2:
+; CHECK: .data_region jt32
+; CHECK: .end_data_region
+
+entry:
+  switch i32 undef, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb6
+    i32 3, label %sw.bb13
+    i32 4, label %sw.bb20
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %return
+
+sw.bb6:                                           ; preds = %entry
+  br label %return
+
+sw.bb13:                                          ; preds = %entry
+  br label %return
+
+sw.bb20:                                          ; preds = %entry
+  %div = sdiv i32 undef, undef
+  br label %return
+
+return:                                           ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry
+  %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 9bdae43..4f4ff8e 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,16 +3,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: 	vadd.f32	q4, q8, q8
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT: 	@DEBUG_VALUE: y <- Q4+0
-;CHECK-NEXT:    @DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT: Ltmp1
+
+;CHECK:@DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
 
 
 @.str = external constant [13 x i8]
 
 declare <4 x float> @test0001(float) nounwind readnone ssp
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
 entry:
   br label %for.body9
 
@@ -21,7 +22,7 @@ for.body9:                                        ; preds = %for.body9, %entry
   tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
   %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
   tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
-  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+  br i1 %cond, label %for.end54, label %for.body9, !dbg !44
 
 for.end54:                                        ; preds = %for.body9
   %tmp115 = extractelement <4 x float> %add19, i32 1
@@ -52,7 +53,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 49c4103..7fbf8f4 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s
 
 define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
@@ -56,3 +56,17 @@ bb1:
 
 declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
 declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+; rdar://11714607
+define i32 @howmany(i32 %x, i32 %y) nounwind {
+entry:
+; CHECK: howmany:
+; CHECK: bl ___udivmodsi4
+; CHECK-NOT: ___udivsi3
+  %rem = urem i32 %x, %y
+  %div = udiv i32 %x, %y
+  %not.cmp = icmp ne i32 %rem, 0
+  %add = zext i1 %not.cmp to i32
+  %cond = add i32 %add, %div
+  ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 45c322d..bcb4ee7 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
 entry:
         %dum = fadd float %a, %b
-	%0 = tail call float @fabsf(float %dum)
+	%0 = tail call float @fabsf(float %dum) readnone
         %dum1 = fadd float %0, %b
 	ret float %dum1
 }
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
new file mode 100644
index 0000000..14721a4
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Fast-isel can't handle non-double multi-reg retvals.
+; This test just check to make sure we don't hit the assert in FinishCall.
+define <16 x i8> @foo() nounwind ssp {
+entry:
+  ret <16 x i8> zeroinitializer
+}
+
+define void @t1() nounwind ssp {
+entry:
+; ARM: @t1
+; THUMB: @t1
+  %call = call <16 x i8> @foo()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index dd460b2..edc805a 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -99,6 +101,11 @@ entry:
 ; ARM: uxtb r9, r12
 ; ARM: str r9, [sp, #4]
 ; ARM: bl _bar
+; ARM-LONG: @t10
+; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; ARM-LONG: ldr lr, [lr]
+; ARM-LONG: blx lr
 ; THUMB: @t10
 ; THUMB: movs r0, #0
 ; THUMB: movt r0, #0
@@ -121,8 +128,96 @@ entry:
 ; THUMB: uxtb.w r9, r12
 ; THUMB: str.w r9, [sp, #4]
 ; THUMB: bl _bar
+; THUMB-LONG: @t10
+; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; THUMB-LONG: ldr.w lr, [lr]
+; THUMB-LONG: blx lr
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
   ret i32 0
 }
 
 declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+define i32 @bar0(i32 %i) nounwind {
+  ret i32 0
+}
+
+define void @foo3() uwtable {
+; ARM: movw    r0, #0
+; ARM: movw    r1, :lower16:_bar0
+; ARM: movt    r1, :upper16:_bar0
+; ARM: blx     r1
+; THUMB: movs    r0, #0
+; THUMB: movw    r1, :lower16:_bar0
+; THUMB: movt    r1, :upper16:_bar0
+; THUMB: blx     r1
+  %fptr = alloca i32 (i32)*, align 8
+  store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
+  %1 = load i32 (i32)** %fptr, align 8
+  %call = call i32 %1(i32 0)
+  ret void
+}
+
+define i32 @LibCall(i32 %a, i32 %b) {
+entry:
+; ARM: LibCall
+; ARM: bl ___udivsi3
+; ARM-LONG: LibCall
+; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: ldr r2, [r2]
+; ARM-LONG: blx r2
+; THUMB: LibCall
+; THUMB: bl ___udivsi3
+; THUMB-LONG: LibCall
+; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: ldr r2, [r2]
+; THUMB-LONG: blx r2
+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @VarArg() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %k = alloca i32, align 4
+  %m = alloca i32, align 4
+  %n = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %0 = load i32* %i, align 4
+  %1 = load i32* %j, align 4
+  %2 = load i32* %k, align 4
+  %3 = load i32* %m, align 4
+  %4 = load i32* %n, align 4
+; ARM: VarArg
+; ARM: mov r7, sp
+; ARM: movw r0, #5
+; ARM: ldr r1, [r7, #-4]
+; ARM: ldr r2, [r7, #-8]
+; ARM: ldr r3, [r7, #-12]
+; ARM: ldr r9, [sp, #16]
+; ARM: ldr r12, [sp, #12]
+; ARM: str r9, [sp]
+; ARM: str r12, [sp, #4]
+; ARM: bl _CallVariadic
+; THUMB: mov r7, sp
+; THUMB: movs r0, #5
+; THUMB: movt r0, #0
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w r9, [sp, #16]
+; THUMB: ldr.w r12, [sp, #12]
+; THUMB: str.w r9, [sp]
+; THUMB: str.w r12, [sp, #4]
+; THUMB: bl _CallVariadic
+  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+  store i32 %call, i32* %tmp, align 4
+  %5 = load i32* %tmp, align 4
+  ret i32 %5
+}
+
+declare i32 @CallVariadic(i32, ...)
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
new file mode 100644
index 0000000..8f7b294
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+
+define i8* @frameaddr_index0() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index0:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+
+; DARWIN-THUMB2: frameaddr_index0:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+
+; LINUX-ARM: frameaddr_index0:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+
+; LINUX-THUMB2: frameaddr_index0:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @frameaddr_index1() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index1:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index1:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index1:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index1:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+
+  %0 = call i8* @llvm.frameaddress(i32 1)
+  ret i8* %0
+}
+
+define i8* @frameaddr_index3() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index3:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index3:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index3:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index3:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+
+  %0 = call i8* @llvm.frameaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index e6bdfa7..b73fcef 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
 
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
@@ -13,6 +15,11 @@ define void @t1() nounwind ssp {
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
 ; ARM: bl _memset
+; ARM-LONG: t1
+; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
 ; THUMB: t1
 ; THUMB: movw r0, :lower16:_message1
 ; THUMB: movt r0, :upper16:_message1
@@ -23,6 +30,11 @@ define void @t1() nounwind ssp {
 ; THUMB: movt r2, #0
 ; THUMB: uxtb r1, r1
 ; THUMB: bl _memset
+; THUMB-LONG: t1
+; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
   call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
   ret void
 }
@@ -41,6 +53,11 @@ define void @t2() nounwind ssp {
 ; ARM: mov r0, r1
 ; ARM: ldr r1, [sp]                @ 4-byte Reload
 ; ARM: bl _memcpy
+; ARM-LONG: t2
+; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
 ; THUMB: t2
 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
 ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -51,6 +68,11 @@ define void @t2() nounwind ssp {
 ; THUMB: movt r2, #0
 ; THUMB: mov r0, r1
 ; THUMB: bl _memcpy
+; THUMB-LONG: t2
+; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
   ret void
 }
@@ -67,6 +89,11 @@ define void @t3() nounwind ssp {
 ; ARM: movw r2, #10
 ; ARM: mov r0, r1
 ; ARM: bl _memmove
+; ARM-LONG: t3
+; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
 ; THUMB: t3
 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
 ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -77,6 +104,11 @@ define void @t3() nounwind ssp {
 ; THUMB: movt r2, #0
 ; THUMB: mov r0, r1
 ; THUMB: bl _memmove
+; THUMB-LONG: t3
+; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
   call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
new file mode 100644
index 0000000..111818b
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+
+define i32 @shl() nounwind ssp {
+entry:
+; ARM: shl
+; ARM: lsl r0, r0, #2
+  %shl = shl i32 -1, 2
+  ret i32 %shl
+}
+
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: shl_reg
+; ARM: lsl r0, r0, r1
+  %shl = shl i32 %src1, %src2
+  ret i32 %shl
+}
+
+define i32 @lshr() nounwind ssp {
+entry:
+; ARM: lshr
+; ARM: lsr r0, r0, #2
+  %lshr = lshr i32 -1, 2
+  ret i32 %lshr
+}
+
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: lshr_reg
+; ARM: lsr r0, r0, r1
+  %lshr = lshr i32 %src1, %src2
+  ret i32 %lshr
+}
+
+define i32 @ashr() nounwind ssp {
+entry:
+; ARM: ashr
+; ARM: asr r0, r0, #2
+  %ashr = ashr i32 -1, 2
+  ret i32 %ashr
+}
+
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: ashr_reg
+; ARM: asr r0, r0, r1
+  %ashr = ashr i32 %src1, %src2
+  ret i32 %ashr
+}
+
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 417e2d9..ecd5fe2 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -226,3 +226,15 @@ define i32 @urem_fold(i32 %a) nounwind {
   %rem = urem i32 %a, 32
   ret i32 %rem
 }
+
+define i32 @test7() noreturn nounwind  {
+entry:
+; ARM: @test7
+; THUMB: @test7
+; ARM: trap
+; THUMB: trap
+  tail call void @llvm.trap( )
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 27fa2b0..5511d24 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -11,7 +11,7 @@ entry:
 ; HARD: test1:
 ; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
 ; HARD: vbsl [[REG1]], d
-  %0 = tail call float @copysignf(float %x, float %y) nounwind
+  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
   ret float %0
 }
 
@@ -25,7 +25,7 @@ entry:
 ; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
 ; HARD: vshl.i64 [[REG2]], [[REG2]], #32
 ; HARD: vbsl [[REG2]], d1, d0
-  %0 = tail call double @copysign(double %x, double %y) nounwind
+  %0 = tail call double @copysign(double %x, double %y) nounwind readnone
   ret double %0
 }
 
@@ -36,7 +36,7 @@ entry:
 ; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
 ; SOFT: vbsl [[REG3]],
   %0 = fmul double %x, %y
-  %1 = tail call double @copysign(double %0, double %z) nounwind
+  %1 = tail call double @copysign(double %0, double %z) nounwind readnone
   ret double %1
 }
 
diff --git a/test/CodeGen/ARM/floorf.ll b/test/CodeGen/ARM/floorf.ll
new file mode 100644
index 0000000..492fc36
--- /dev/null
+++ b/test/CodeGen/ARM/floorf.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=arm-unknown-unknown < %s | FileCheck %s
+
+; CHECK: test1
+define float @test1() nounwind uwtable readnone ssp {
+; CHECK-NOT: floorf
+  %foo = call float @floorf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+; CHECK: test2
+define float @test2() nounwind uwtable readnone ssp {
+; CHECK-NOT: ceilf
+  %foo = call float @ceilf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+; CHECK: test3
+define float @test3() nounwind uwtable readnone ssp {
+; CHECK-NOT: truncf
+  %foo = call float @truncf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+declare float @floorf(float) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare float @truncf(float) nounwind readnone
+
+
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index bc118b8..3c3182b 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -21,3 +21,12 @@ entry:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
 ; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}
+
+; VFP2: test2
+define float @test2(float %a) nounwind {
+; CHECK-NOT: mul
+; CHECK: mov pc, lr
+  %ret = fmul float %a, 1.0
+  ret float %ret
+}
+
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ce6d6b2..40ea33b 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
 ;CHECK: f1:
@@ -84,7 +84,7 @@ define float @f11(float %a) {
 ;CHECK: f11:
 ;CHECK: bic
 entry:
-	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
+	%tmp1 = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	ret float %tmp1
 }
 
@@ -94,7 +94,7 @@ define double @f12(double %a) {
 ;CHECK: f12:
 ;CHECK: vabs.f64
 entry:
-	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
+	%tmp1 = call double @fabs( double %a ) readnone	; <double> [#uses=1]
 	ret double %tmp1
 }
 
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 802d1b8..303d165 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
 ; Check generated fused MAC and MLS.
 
 define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -138,8 +138,16 @@ entry:
 ; CHECK: vfms.f64
   %tmp1 = fsub double -0.0, %b
   %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
-  %tmp3 = fsub double -0.0, %tmp2
-  ret double %tmp3
+  ret double %tmp2
+}
+
+define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
+; CHECK: test_fnms_f32
+; CHECK: vfnms.f32
+  %tmp1 = load float* %c, align 4
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
+  ret float %tmp3 
 }
 
 define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -158,7 +166,8 @@ entry:
 ; CHECK: vfnms.f64
   %tmp1 = fsub double -0.0, %b
   %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
-  ret double %tmp2
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
 }
 
 define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -180,6 +189,36 @@ entry:
   ret double %tmp3
 }
 
+define float @test_fma_const_fold(float %a, float %b) nounwind {
+; CHECK: test_fma_const_fold
+; CHECK-NOT: vfma
+; CHECK-NOT: vmul
+; CHECK: vadd
+  %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b)
+  ret float %ret
+}
+
+define float @test_fma_canonicalize(float %a, float %b) nounwind {
+; CHECK: test_fma_canonicalize
+; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00
+; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]]
+  %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b)
+  ret float %ret
+}
+
+; Check that very wide vector fma's can be split into legal fma's.
+define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp {
+; CHECK: test_fma_v8f32
+; CHECK: vfma.f32
+; CHECK: vfma.f32
+entry:
+  %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone
+  store <8 x float> %call, <8 x float>* %p, align 16
+  ret void
+}
+
+
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index 89e309d..600a8c2 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -10,7 +10,25 @@ define i32 @test(i32 %a) {
         %b = icmp sgt i32 %a, -1
         %abs = select i1 %b, i32 %a, i32 %tmp1neg
         ret i32 %abs
-; CHECK:  movs r0, r0
+; CHECK:  cmp
 ; CHECK:  rsbmi r0, r0, #0
 ; CHECK:  bx lr
 }
+
+; rdar://11633193
+;; 3 instructions will be generated for abs(a-b):
+;;   subs
+;;   rsbmi
+;;   bx
+define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: test2
+; CHECK: subs
+; CHECK-NEXT: rsbmi
+; CHECK-NEXT: bx
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, -1
+  %sub1 = sub nsw i32 0, %sub
+  %cond = select i1 %cmp, i32 %sub, i32 %sub1
+  ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 3f8fd75..73b546d 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3
 ; rdar://6949835
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
@@ -18,7 +18,6 @@ entry:
 
 ; M3: t:
 ; M3-NOT: ldrd
-; M3: ldm.w r2, {r2, r3}
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 8130019..0c8d387 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]"
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index 991d728..bbedea1 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=armv7-eabi      | FileCheck %s -check-prefix=EABI
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic            | FileCheck %s -check-prefix=IOS-PIC
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static         | FileCheck %s -check-prefix=IOS-STATIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-eabi      | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=pic            | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=static         | FileCheck %s -check-prefix=IOS-STATIC
 
 @foo = common global i32 0
 
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index de48fee..4a82c36 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
 
 define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
-;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
+;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
 ;CHECK: vmovn.i16
 	%tmp1 = load <8 x i8>* %A
@@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
+;CHECK: vmovn.i32
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-;CHECK: vmovn.i32
 ;CHECK: vqmovun.s16
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
index b4da552..df98e23 100644
--- a/test/CodeGen/ARM/opt-shuff-tstore.ll
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -mattr=+neon < %s | FileCheck %s
 
 ; CHECK: func_4_8
 ; CHECK: vst1.32
diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll
new file mode 100644
index 0000000..4bc8810
--- /dev/null
+++ b/test/CodeGen/ARM/pr13249.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple armv7--linux-gnueabi
+
+define arm_aapcscc i8* @__strtok_r_1c(i8* %arg, i8 signext %arg1, i8** nocapture %arg2) nounwind {
+bb:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb
+  %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
+  %tmp4 = load i8* %tmp, align 1
+  %tmp5 = getelementptr inbounds i8* %tmp, i32 1
+  br i1 undef, label %bb3, label %bb7
+
+bb7:                                              ; preds = %bb13, %bb3
+  %tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ]
+  %tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ]
+  %tmp10 = icmp ne i8 %tmp8, %arg1
+  %tmp12 = getelementptr inbounds i8* %tmp9, i32 1
+  br i1 %tmp10, label %bb13, label %bb15
+
+bb13:                                             ; preds = %bb7
+  %tmp14 = load i8* %tmp12, align 1
+  br label %bb7
+
+bb15:                                             ; preds = %bb7
+  store i8* %tmp9, i8** %arg2, align 4
+  ret i8* %tmp
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 3e07da8..418d4f3 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -113,3 +113,29 @@ entry:
   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
   ret void
 }
+
+; CHECK: f10
+define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+  %1 = icmp eq i32 %a, %b
+  %2 = zext i1 %1 to i32
+  %3 = sitofp i32 %2 to float
+  ret float %3
+}
+
+; CHECK: f11
+define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+  %1 = icmp eq i32 %a, %b
+  %2 = sitofp i1 %1 to float
+  ret float %2
+}
+
+; CHECK: f12
+define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatunsisf
+  %1 = icmp eq i32 %a, %b
+  %2 = uitofp i1 %1 to float
+  ret float %2
+}
+
diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll
new file mode 100644
index 0000000..99df0d4
--- /dev/null
+++ b/test/CodeGen/ARM/smml.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+; CHECK-NOT: smmls
+  %conv4 = zext i32 %a to i64
+  %conv1 = sext i32 %b to i64
+  %conv2 = sext i32 %c to i64
+  %mul = mul nsw i64 %conv2, %conv1
+  %shr5 = lshr i64 %mul, 32
+  %sub = sub nsw i64 %conv4, %shr5
+  %conv3 = trunc i64 %sub to i32
+  ret i32 %conv3
+}
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 983ba45..5ce2bce 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
-
-; The greedy register allocator uses a single CSR here, invalidating the test.
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-; CHECK: push {lr}
-; CHECK: pop {lr}
+; CHECK: push {r4, r5, lr}
+; CHECK: pop {r4, r5, pc}
+        call void asm sideeffect "", "~{r4},~{r5}"() nounwind
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index e56e3f2..d8b3f0e 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm | \
-; RUN:   grep {str.*\\!} | count 2
+; RUN:   grep "str.*\!" | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
         %B = load i32* %A               ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
new file mode 100644
index 0000000..99ba475
--- /dev/null
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
+
+; rdar://9877866
+%struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
+%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
+
+define i32 @f() nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: ldr
+; CHECK: str
+; CHECK-NOT:bne
+  %st = alloca %struct.SmallStruct, align 4
+  %call = call i32 @e1(%struct.SmallStruct* byval %st)
+  ret i32 0
+}
+
+; Generate a loop for large struct byval
+define i32 @g() nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: ldr
+; CHECK: sub
+; CHECK: str
+; CHECK: bne
+  %st = alloca %struct.LargeStruct, align 4
+  %call = call i32 @e2(%struct.LargeStruct* byval %st)
+  ret i32 0
+}
+
+; Generate a loop using NEON instructions
+define i32 @h() nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: vld1
+; CHECK: sub
+; CHECK: vst1
+; CHECK: bne
+  %st = alloca %struct.LargeStruct, align 16
+  %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
+  ret i32 0
+}
+
+declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
+declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
+declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
new file mode 100644
index 0000000..6fcbdee
--- /dev/null
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: subs
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %sub. = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %sub.
+}
+
+define i32 @g(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: subs
+; CHECK-NOT: cmp
+  %cmp = icmp slt i32 %a, %b
+  %sub = sub nsw i32 %b, %a
+  %sub. = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %sub.
+}
+
+define i32 @h(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: subs
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %a, 3
+  %sub = sub nsw i32 %a, 3
+  %sub. = select i1 %cmp, i32 %sub, i32 %b
+  ret i32 %sub.
+}
+
+; rdar://11725965
+define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: i:
+; CHECK: subs
+; CHECK-NOT: cmp
+  %cmp = icmp ult i32 %a, %b
+  %sub = sub i32 %b, %a
+  %sub. = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %sub.
+}
+; If CPSR is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @j(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: j:
+; CHECK: sub
+; CHECK: cmp
+  %cmp = icmp eq i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %cmp2 = icmp sgt i32 %b, %a
+  %sel = select i1 %cmp2, i32 %sub, i32 %a
+  ret i32 %sel
+
+if.else:
+  ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 06ea703..474043a 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -36,3 +36,15 @@ entry:
   %sel = select i1 %cmp, i32 1, i32 %sub
   ret i32 %sel
 }
+
+; rdar://11726136
+define i32 @f5(i32 %x) {
+entry:
+; CHECK: f5
+; CHECK: movw r1, #65535
+; CHECK-NOT: movt
+; CHECK-NOT: add
+; CHECK: sub r0, r0, r1
+  %sub = add i32 %x, -65535
+  ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 3143387..c403fa5 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {__aeabi_read_tp}
+; RUN:     grep "__aeabi_read_tp"
 
 define i8* @test() {
 entry:
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 28fd469..a25352c 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -3,10 +3,10 @@
 
 define i32 @test(i32 %a, i32 %b) {
 entry:
-; CHECK:        movs.w
+; CHECK:        cmp
 ; CHECK-NEXT:   it    mi
 ; CHECK-NEXT:   rsbmi
-; CHECK-NEXT:   movs.w
+; CHECK-NEXT:   cmp
 ; CHECK-NEXT:   it    mi
 ; CHECK-NEXT:   rsbmi
  %cmp1 = icmp slt i32 %a, 0
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
new file mode 100644
index 0000000..a5f3c90
--- /dev/null
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -0,0 +1,117 @@
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s
+
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+  ret i32* @external_gd
+
+  ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+  ; CHECK-NONPIC:   f1:
+  ; CHECK-NONPIC:   external_gd(gottpoff)
+  ; CHECK-PIC:      f1:
+  ; CHECK-PIC:      external_gd(tlsgd)
+}
+
+define i32* @f2() {
+entry:
+  ret i32* @internal_gd
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic,
+  ; but that is not implemented, so falls back to general dynamic.
+  ; CHECK-NONPIC:   f2:
+  ; CHECK-NONPIC:   internal_gd(tpoff)
+  ; CHECK-PIC:      f2:
+  ; CHECK-PIC:      internal_gd(tlsgd)
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+  ret i32* @external_ld
+
+  ; Non-PIC code can use initial exec, PIC should use local dynamic,
+  ; but that is not implemented, so falls back to general dynamic.
+  ; CHECK-NONPIC:   f3:
+  ; CHECK-NONPIC:   external_ld(gottpoff)
+  ; CHECK-PIC:      f3:
+  ; CHECK-PIC:      external_ld(tlsgd)
+}
+
+define i32* @f4() {
+entry:
+  ret i32* @internal_ld
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic,
+  ; but that is not implemented, so it falls back to general dynamic.
+  ; CHECK-NONPIC:   f4:
+  ; CHECK-NONPIC:   internal_ld(tpoff)
+  ; CHECK-PIC:      f4:
+  ; CHECK-PIC:      internal_ld(tlsgd)
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+  ret i32* @external_ie
+
+  ; Non-PIC and PIC code will use initial exec as specified.
+  ; CHECK-NONPIC:   f5:
+  ; CHECK-NONPIC:   external_ie(gottpoff)
+  ; CHECK-PIC:      f5:
+  ; CHECK-PIC:      external_ie(gottpoff)
+}
+
+define i32* @f6() {
+entry:
+  ret i32* @internal_ie
+
+  ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+  ; CHECK-NONPIC:   f6:
+  ; CHECK-NONPIC:   internal_ie(tpoff)
+  ; CHECK-PIC:      f6:
+  ; CHECK-PIC:      internal_ie(gottpoff)
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+  ret i32* @external_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; CHECK-NONPIC:   f7:
+  ; CHECK-NONPIC:   external_le(tpoff)
+  ; CHECK-PIC:      f7:
+  ; CHECK-PIC:      external_le(tpoff)
+}
+
+define i32* @f8() {
+entry:
+  ret i32* @internal_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; CHECK-NONPIC:   f8:
+  ; CHECK-NONPIC:   internal_le(tpoff)
+  ; CHECK-PIC:      f8:
+  ; CHECK-PIC:      internal_le(tpoff)
+}
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 1087094..ec4278c 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {i(tpoff)}
+; RUN:     grep "i(tpoff)"
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {__aeabi_read_tp}
+; RUN:     grep "__aeabi_read_tp"
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN:     -relocation-model=pic | grep "__tls_get_addr"
 
 
 @i = thread_local global i32 15		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df7a4ca..e0e944f 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {tbss}
+; RUN:     grep "tbss"
 
 %struct.anon = type { i32, i32 }
 @teste = internal thread_local global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=1]
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
new file mode 100644
index 0000000..4e227dd
--- /dev/null
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -0,0 +1,21 @@
+; Tests for the two-address instruction pass.
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+
+define void @PR13378() nounwind {
+; This was orriginally a crasher trying to schedule the instructions.
+; CHECK:      PR13378:
+; CHECK:        vldmia
+; CHECK-NEXT:   vmov.f32
+; CHECK-NEXT:   vstmia
+; CHECK-NEXT:   vstmia
+; CHECK-NEXT:   vmov.f32
+; CHECK-NEXT:   vstmia
+
+entry:
+  %0 = load <4 x float>* undef
+  store <4 x float> zeroinitializer, <4 x float>* undef
+  store <4 x float> %0, <4 x float>* undef
+  %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
+  store <4 x float> %1, <4 x float>* undef
+  unreachable
+}
diff --git a/test/CodeGen/ARM/unsafe-fsub.ll b/test/CodeGen/ARM/unsafe-fsub.ll
new file mode 100644
index 0000000..3a4477d
--- /dev/null
+++ b/test/CodeGen/ARM/unsafe-fsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s
+; RUN: llc -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=FAST %s
+
+target triple = "armv7-apple-ios"
+
+; SAFE: test
+; FAST: test
+define float @test(float %x, float %y) {
+entry:
+; SAFE: vmul.f32
+; SAFE: vsub.f32
+; FAST: mov r0, #0
+  %0 = fmul float %x, %y
+  %1 = fsub float %0, %0
+  ret float %1
+}
+
+
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 450f90d..9f55c24 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,79 +1,80 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; NB: this tests vcnt, vclz, and vcls
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK: vcnt8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
-	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 ;CHECK: vcntQ8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
-	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
 
-declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
 
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 ;CHECK: vclz8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
-	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK: vclz16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <4 x i16>* %A
-	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 ;CHECK: vclz32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <2 x i32>* %A
-	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
 	ret <2 x i32> %tmp2
 }
 
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 ;CHECK: vclzQ8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
-	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 ;CHECK: vclzQ16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <8 x i16>* %A
-	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
 ;CHECK: vclzQ32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <4 x i32>* %A
-	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
 	ret <4 x i32> %tmp2
 }
 
-declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
 
-declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 ;CHECK: vclss8:
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index 1ec36da..8fd3db2 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) {
 
 ; CHECK: g:
 define float @g(<4 x i8>* nocapture %in) {
-  ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+  ; CHECK: vld1
   ; CHECK: vmovl.u8
   ; CHECK: vmovl.u16
   %1 = load <4 x i8>* %in
@@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) {
 
 ; CHECK: i:
 define <4 x i8> @i(<4 x i8>* %x) {
-  ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+  ; CHECK: vld1
   ; CHECK: vmovl.s8
   ; CHECK: vmovl.s16
   ; CHECK: vrecpe
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 49a6982..7a4b34f 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -17,11 +17,11 @@ define void @test_abs(float* %P, double* %D) {
 ;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
 ;CHECK: vabs.f32
-	%b = call float @fabsf( float %a )		; <float> [#uses=1]
+	%b = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
 ;CHECK: vabs.f64
-	%B = call double @fabs( double %A )		; <double> [#uses=1]
+	%B = call double @fabs( double %A ) readnone	; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 61d73c1..c69473f 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -75,12 +75,12 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
         ret <8 x i8> %tmp5
 }
 
-define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+define <4 x i16> @vld2dupi16(i8* %A) nounwind {
 ;CHECK: vld2dupi16:
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
 ;CHECK: vld2.16 {d16[], d17[]}, [r0]
-	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -94,7 +94,8 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 ;CHECK: vld2dupi16_update:
 ;CHECK: vld2.16 {d16[], d17[]}, [r1]!
 	%A = load i16** %ptr
-	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+        %A2 = bitcast i16* %A to i8*
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -105,11 +106,11 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 	ret <4 x i16> %tmp5
 }
 
-define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
-	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -119,8 +120,8 @@ define <2 x i32> @vld2dupi32(i32* %A) nounwind {
 }
 
 declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -144,11 +145,11 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
 	ret <8 x i8> %tmp8
 }
 
-define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+define <4 x i16> @vld3dupi16(i8* %A) nounwind {
 ;CHECK: vld3dupi16:
 ;Check the (default) alignment value. VLD3 does not support alignment.
 ;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
-	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
@@ -161,7 +162,7 @@ define <4 x i16> @vld3dupi16(i16* %A) nounwind {
 }
 
 declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
 
 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
@@ -171,7 +172,8 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4dupi16_update:
 ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
 	%A = load i16** %ptr
-	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+        %A2 = bitcast i16* %A to i8*
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
 	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
@@ -188,12 +190,12 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 	ret <4 x i16> %tmp11
 }
 
-define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+define <2 x i32> @vld4dupi32(i8* %A) nounwind {
 ;CHECK: vld4dupi32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
-	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
@@ -208,5 +210,5 @@ define <2 x i32> @vld4dupi32(i32* %A) nounwind {
         ret <2 x i32> %tmp11
 }
 
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 61d89bb..74628f0 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
   %3 = extractelement <8 x i16> %2, i32 0
   ret i16 %3
 }
+
+; A constant build_vector created for a vmull with half-width elements must
+; not introduce illegal types. <rdar://problem/11324364>
+define void @vmull_buildvector() nounwind optsize ssp align 2 {
+; CHECK: vmull_buildvector
+entry:
+  br i1 undef, label %for.end179, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.cond.loopexit:                                ; preds = %for.body33, %for.body
+  br i1 undef, label %for.end179, label %for.body
+
+for.body:                                         ; preds = %for.cond.loopexit, %for.body.lr.ph
+  br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph
+
+for.body33.lr.ph:                                 ; preds = %for.body
+  %.sub = select i1 undef, i32 0, i32 undef
+  br label %for.body33
+
+for.body33:                                       ; preds = %for.body33, %for.body33.lr.ph
+  %add45 = add i32 undef, undef
+  %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
+  %0 = load i32** undef, align 4
+  %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
+  %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
+  %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
+  %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8>
+  %3 = bitcast <16 x i8> undef to <2 x i64>
+  %vmovl.i237 = zext <8 x i8> undef to <8 x i16>
+  %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %vmovl.i225 = zext <8 x i8> undef to <8 x i16>
+  %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249
+  %vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind
+  %mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>
+  %vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130
+  %sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134
+  %add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96>
+  %add.i198 = add <8 x i16> %add.i200, %sub.i203
+  %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237
+  %mul.i191 = mul <8 x i16> %vshr_n130, undef
+  %add.i192 = add <8 x i16> %mul.i191, %mul.i194
+  %mul.i187 = mul <8 x i16> %vshr_n134, undef
+  %add.i188 = add <8 x i16> %mul.i187, %add.i192
+  %mul.i185 = mul <8 x i16> undef, undef
+  %add.i186 = add <8 x i16> %mul.i185, undef
+  %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+  %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+  %mul.i184 = mul <8 x i16> undef, %vrshr_n160
+  %mul.i181 = mul <8 x i16> undef, %vmovl.i225
+  %add.i182 = add <8 x i16> %mul.i181, %mul.i184
+  %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>)
+  %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind
+  %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %5 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* undef, align 16
+  %add177 = add nsw i32 undef, 16
+  br i1 undef, label %for.body33, label %for.cond.loopexit
+
+for.end179:                                       ; preds = %for.cond.loopexit, %entry
+  ret void
+}
+
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index e3372a0..f117ab2 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 71fea12..16e9798 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -5,3 +5,4 @@ define void @foo() {
   ret void
 }
 
+
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index c14fd7b..f6b028d 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
+; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
 
 ; Exercise the floating point comparison operators for f32:
 
@@ -15,8 +15,8 @@ define i1 @fcmp_eq(float %arg1, float %arg2) {
 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
 ; CHECK: fcmeq
 ; CHECK: bi $lr
-        %1 = call float @fabsf(float %arg1)
-        %2 = call float @fabsf(float %arg2)
+        %1 = call float @fabsf(float %arg1) readnone
+        %2 = call float @fabsf(float %arg2) readnone
         %3 = fcmp oeq float %1, %2
         ret i1 %3
 }
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 1e5e3b3..6e01906 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -32,11 +32,11 @@ declare double @fabs(double)
 declare float @fabsf(float)
 
 define double @fabs_dp(double %X) {
-        %Y = call double @fabs( double %X )
+        %Y = call double @fabs( double %X ) readnone
         ret double %Y
 }
 
 define float @fabs_sp(float %X) {
-        %Y = call float @fabsf( float %X )
+        %Y = call float @fabsf( float %X ) readnone
         ret float %Y
 }
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
index 32b1261..2f9b091 100644
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -1,14 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilh                                %t1.s | count 15
-; RUN: grep ceqh                               %t1.s | count 29
-; RUN: grep ceqhi                              %t1.s | count 13
-; RUN: grep clgth                              %t1.s | count 15
-; RUN: grep cgth                               %t1.s | count 14
-; RUN: grep cgthi                              %t1.s | count 6
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17
-; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -27,6 +17,10 @@ target triple = "spu"
 
 ; i16 integer comparisons:
 define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_select_i16:
+; CHECK:        ceqh
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp eq i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -34,12 +28,22 @@ entry:
 }
 
 define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_setcc_i16:
+; CHECK:        ilhu
+; CHECK:        ceqh
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp eq i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_immed01_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i16 %arg1, 511
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -47,6 +51,10 @@ entry:
 }
 
 define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_immed02_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i16 %arg1, -512
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -54,6 +62,10 @@ entry:
 }
 
 define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_immed03_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i16 %arg1, -1
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -61,6 +73,11 @@ entry:
 }
 
 define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_eq_immed04_i16:
+; CHECK:        ilh
+; CHECK:        ceqh
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i16 %arg1, 32768
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -68,6 +85,10 @@ entry:
 }
 
 define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_select_i16:
+; CHECK:        ceqh
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ne i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -75,12 +96,23 @@ entry:
 }
 
 define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_setcc_i16:
+; CHECK:        ceqh
+; CHECK:        ilhu
+; CHECK:        xorhi
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ne i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_immed01_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i16 %arg1, 511
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -88,6 +120,10 @@ entry:
 }
 
 define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_immed02_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i16 %arg1, -512
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -95,6 +131,10 @@ entry:
 }
 
 define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_immed03_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i16 %arg1, -1
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -102,6 +142,11 @@ entry:
 }
 
 define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ne_immed04_i16:
+; CHECK:        ilh
+; CHECK:        ceqh
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i16 %arg1, 32768
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -109,6 +154,10 @@ entry:
 }
 
 define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_select_i16:
+; CHECK:        clgth
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ugt i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -116,12 +165,22 @@ entry:
 }
 
 define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_setcc_i16:
+; CHECK:        ilhu
+; CHECK:        clgth
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ugt i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_immed01_i16:
+; CHECK:        clgthi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i16 %arg1, 500
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -129,6 +188,10 @@ entry:
 }
 
 define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_immed02_i16:
+; CHECK:        ceqhi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ugt i16 %arg1, 0
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -136,6 +199,10 @@ entry:
 }
 
 define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_immed03_i16:
+; CHECK:        clgthi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i16 %arg1, 65024
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -143,6 +210,11 @@ entry:
 }
 
 define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ugt_immed04_i16:
+; CHECK:        ilh
+; CHECK:        clgth
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i16 %arg1, 32768
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -150,6 +222,12 @@ entry:
 }
 
 define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_uge_select_i16:
+; CHECK:        ceqh
+; CHECK:        clgth
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp uge i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -157,6 +235,14 @@ entry:
 }
 
 define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_uge_setcc_i16:
+; CHECK:        ceqh
+; CHECK:        clgth
+; CHECK:        ilhu
+; CHECK:        or
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp uge i16 %arg1, %arg2
        ret i1 %A
@@ -169,6 +255,12 @@ entry:
 ;; they'll ever be generated.
 
 define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_select_i16:
+; CHECK:        ceqh
+; CHECK:        clgth
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ult i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -176,12 +268,26 @@ entry:
 }
 
 define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_setcc_i16:
+; CHECK:        ceqh
+; CHECK:        clgth
+; CHECK:        ilhu
+; CHECK:        nor
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ult i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_immed01_i16:
+; CHECK:        ceqhi
+; CHECK:        clgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i16 %arg1, 511
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -189,6 +295,12 @@ entry:
 }
 
 define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_immed02_i16:
+; CHECK:        ceqhi
+; CHECK:        clgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i16 %arg1, 65534
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -196,6 +308,12 @@ entry:
 }
 
 define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_immed03_i16:
+; CHECK:        ceqhi
+; CHECK:        clgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i16 %arg1, 65024
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -203,6 +321,13 @@ entry:
 }
 
 define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ult_immed04_i16:
+; CHECK:        ilh
+; CHECK:        ceqh
+; CHECK:        clgth
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i16 %arg1, 32769
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -210,6 +335,10 @@ entry:
 }
 
 define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ule_select_i16:
+; CHECK:        clgth
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ule i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -217,6 +346,13 @@ entry:
 }
 
 define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_ule_setcc_i16:
+; CHECK:        clgth
+; CHECK:        ilhu
+; CHECK:        xorhi
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ule i16 %arg1, %arg2
        ret i1 %A
@@ -229,6 +365,10 @@ entry:
 ;; they'll ever be generated.
 
 define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_select_i16:
+; CHECK:        cgth
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sgt i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -236,12 +376,22 @@ entry:
 }
 
 define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_setcc_i16:
+; CHECK:        ilhu
+; CHECK:        cgth
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp sgt i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_immed01_i16:
+; CHECK:        cgthi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i16 %arg1, 511
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -249,6 +399,10 @@ entry:
 }
 
 define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_immed02_i16:
+; CHECK:        cgthi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i16 %arg1, -1
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -256,6 +410,10 @@ entry:
 }
 
 define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_immed03_i16:
+; CHECK:        cgthi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i16 %arg1, -512
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -263,6 +421,11 @@ entry:
 }
 
 define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sgt_immed04_i16:
+; CHECK:        ilh
+; CHECK:        ceqh
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp sgt i16 %arg1, 32768
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -270,6 +433,12 @@ entry:
 }
 
 define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sge_select_i16:
+; CHECK:        ceqh
+; CHECK:        cgth
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sge i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -277,6 +446,14 @@ entry:
 }
 
 define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sge_setcc_i16:
+; CHECK:        ceqh
+; CHECK:        cgth
+; CHECK:        ilhu
+; CHECK:        or
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp sge i16 %arg1, %arg2
        ret i1 %A
@@ -289,6 +466,12 @@ entry:
 ;; they'll ever be generated.
 
 define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_select_i16:
+; CHECK:        ceqh
+; CHECK:        cgth
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp slt i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -296,12 +479,26 @@ entry:
 }
 
 define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_setcc_i16:
+; CHECK:        ceqh
+; CHECK:        cgth
+; CHECK:        ilhu
+; CHECK:        nor
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp slt i16 %arg1, %arg2
        ret i1 %A
 }
 
 define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_immed01_i16:
+; CHECK:        ceqhi
+; CHECK:        cgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i16 %arg1, 511
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -309,6 +506,12 @@ entry:
 }
 
 define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_immed02_i16:
+; CHECK:        ceqhi
+; CHECK:        cgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i16 %arg1, -512
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -316,6 +519,12 @@ entry:
 }
 
 define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_immed03_i16:
+; CHECK:        ceqhi
+; CHECK:        cgthi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i16 %arg1, -1
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -323,6 +532,10 @@ entry:
 }
 
 define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_slt_immed04_i16:
+; CHECK:        lr
+; CHECK-NETX:   bi
+
 entry:
        %A = icmp slt i16 %arg1, 32768
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -330,6 +543,10 @@ entry:
 }
 
 define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sle_select_i16:
+; CHECK:        cgth
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp sle i16 %arg1, %arg2
        %B = select i1 %A, i16 %val1, i16 %val2
@@ -337,6 +554,13 @@ entry:
 }
 
 define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK:      icmp_sle_setcc_i16:
+; CHECK:        cgth
+; CHECK:        ilhu
+; CHECK:        xorhi
+; CHECK:        iohl
+; CHECK-NETX:   bi
+
 entry:
        %A = icmp sle i16 %arg1, %arg2
        ret i1 %A
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
index ccbb5f7..1794f4c 100644
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -1,14 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ila                                %t1.s | count 6
-; RUN: grep ceq                                %t1.s | count 28
-; RUN: grep ceqi                               %t1.s | count 12
-; RUN: grep clgt                               %t1.s | count 16
-; RUN: grep clgti                              %t1.s | count 6
-; RUN: grep cgt                                %t1.s | count 16
-; RUN: grep cgti                               %t1.s | count 6
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -27,6 +17,10 @@ target triple = "spu"
 
 ; i32 integer comparisons:
 define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_select_i32:
+; CHECK:        ceq
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp eq i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -34,12 +28,22 @@ entry:
 }
 
 define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_setcc_i32:
+; CHECK:        ilhu
+; CHECK:        ceq
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp eq i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_immed01_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -47,6 +51,10 @@ entry:
 }
 
 define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_immed02_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i32 %arg1, -512
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -54,6 +62,10 @@ entry:
 }
 
 define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_immed03_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i32 %arg1, -1
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -61,6 +73,11 @@ entry:
 }
 
 define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_eq_immed04_i32:
+; CHECK:        ila
+; CHECK:        ceq
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -68,6 +85,10 @@ entry:
 }
 
 define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_select_i32:
+; CHECK:        ceq
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ne i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -75,12 +96,23 @@ entry:
 }
 
 define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_setcc_i32:
+; CHECK:        ceq
+; CHECK:        ilhu
+; CHECK:        xori
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ne i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_immed01_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -88,6 +120,10 @@ entry:
 }
 
 define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_immed02_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i32 %arg1, -512
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -95,6 +131,10 @@ entry:
 }
 
 define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_immed03_i32:
+; CHECK:        ceqi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i32 %arg1, -1
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -102,6 +142,11 @@ entry:
 }
 
 define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ne_immed04_i32:
+; CHECK:        ila
+; CHECK:        ceq
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -109,6 +154,10 @@ entry:
 }
 
 define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_select_i32:
+; CHECK:        clgt
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ugt i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -116,12 +165,22 @@ entry:
 }
 
 define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_setcc_i32:
+; CHECK:        ilhu
+; CHECK:        clgt
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ugt i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_immed01_i32:
+; CHECK:        clgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -129,6 +188,10 @@ entry:
 }
 
 define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_immed02_i32:
+; CHECK:        clgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i32 %arg1, 4294966784
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -136,6 +199,10 @@ entry:
 }
 
 define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_immed03_i32:
+; CHECK:        clgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i32 %arg1, 4294967293
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -143,6 +210,11 @@ entry:
 }
 
 define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ugt_immed04_i32:
+; CHECK:        ila
+; CHECK:        clgt
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -150,6 +222,12 @@ entry:
 }
 
 define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_uge_select_i32:
+; CHECK:        ceq
+; CHECK:        clgt
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp uge i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -157,6 +235,14 @@ entry:
 }
 
 define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_uge_setcc_i32:
+; CHECK:        ceq
+; CHECK:        clgt
+; CHECK:        ilhu
+; CHECK:        or
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp uge i32 %arg1, %arg2
        ret i1 %A
@@ -169,6 +255,12 @@ entry:
 ;; they'll ever be generated.
 
 define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_select_i32:
+; CHECK:        ceq
+; CHECK:        clgt
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ult i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -176,12 +268,26 @@ entry:
 }
 
 define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_setcc_i32:
+; CHECK:        ceq
+; CHECK:        clgt
+; CHECK:        ilhu
+; CHECK:        nor
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ult i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_immed01_i32:
+; CHECK:        ceqi
+; CHECK:        clgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -189,6 +295,12 @@ entry:
 }
 
 define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_immed02_i32:
+; CHECK:        ceqi
+; CHECK:        clgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i32 %arg1, 4294966784
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -196,6 +308,12 @@ entry:
 }
 
 define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_immed03_i32:
+; CHECK:        ceqi
+; CHECK:        clgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i32 %arg1, 4294967293
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -203,6 +321,11 @@ entry:
 }
 
 define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ult_immed04_i32:
+; CHECK:        rotmi
+; CHECK:        ceqi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -210,6 +333,10 @@ entry:
 }
 
 define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ule_select_i32:
+; CHECK:        clgt
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ule i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -217,6 +344,13 @@ entry:
 }
 
 define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_ule_setcc_i32:
+; CHECK:        clgt
+; CHECK:        ilhu
+; CHECK:        xori
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp ule i32 %arg1, %arg2
        ret i1 %A
@@ -229,6 +363,10 @@ entry:
 ;; they'll ever be generated.
 
 define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_select_i32:
+; CHECK:        cgt
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sgt i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -236,12 +374,22 @@ entry:
 }
 
 define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_setcc_i32:
+; CHECK:        ilhu
+; CHECK:        cgt
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp sgt i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_immed01_i32:
+; CHECK:        cgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -249,6 +397,10 @@ entry:
 }
 
 define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_immed02_i32:
+; CHECK:        cgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i32 %arg1, 4294966784
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -256,6 +408,10 @@ entry:
 }
 
 define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_immed03_i32:
+; CHECK:        cgti
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i32 %arg1, 4294967293
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -263,6 +419,11 @@ entry:
 }
 
 define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sgt_immed04_i32:
+; CHECK:        ila
+; CHECK:        cgt
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -270,6 +431,12 @@ entry:
 }
 
 define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sge_select_i32:
+; CHECK:        ceq
+; CHECK:        cgt
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sge i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -277,6 +444,14 @@ entry:
 }
 
 define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sge_setcc_i32:
+; CHECK:        ceq
+; CHECK:        cgt
+; CHECK:        ilhu
+; CHECK:        or
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp sge i32 %arg1, %arg2
        ret i1 %A
@@ -289,6 +464,12 @@ entry:
 ;; they'll ever be generated.
 
 define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_select_i32:
+; CHECK:        ceq
+; CHECK:        cgt
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp slt i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -296,12 +477,26 @@ entry:
 }
 
 define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_setcc_i32:
+; CHECK:        ceq
+; CHECK:        cgt
+; CHECK:        ilhu
+; CHECK:        nor
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp slt i32 %arg1, %arg2
        ret i1 %A
 }
 
 define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_immed01_i32:
+; CHECK:        ceqi
+; CHECK:        cgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i32 %arg1, 511
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -309,6 +504,12 @@ entry:
 }
 
 define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_immed02_i32:
+; CHECK:        ceqi
+; CHECK:        cgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i32 %arg1, -512
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -316,6 +517,12 @@ entry:
 }
 
 define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_immed03_i32:
+; CHECK:        ceqi
+; CHECK:        cgti
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i32 %arg1, -1
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -323,6 +530,13 @@ entry:
 }
 
 define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_slt_immed04_i32:
+; CHECK:        ila
+; CHECK:        ceq
+; CHECK:        cgt
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i32 %arg1, 32768
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -330,6 +544,10 @@ entry:
 }
 
 define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sle_select_i32:
+; CHECK:        cgt
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp sle i32 %arg1, %arg2
        %B = select i1 %A, i32 %val1, i32 %val2
@@ -337,6 +555,13 @@ entry:
 }
 
 define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK:      icmp_sle_setcc_i32:
+; CHECK:        cgt
+; CHECK:        ilhu
+; CHECK:        xori
+; CHECK:        iohl
+; CHECK:        shufb
+
 entry:
        %A = icmp sle i32 %arg1, %arg2
        ret i1 %A
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
index 5517d10..1db641e 100644
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -1,13 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceqb                               %t1.s | count 24
-; RUN: grep ceqbi                              %t1.s | count 12
-; RUN: grep clgtb                              %t1.s | count 11
-; RUN: grep cgtb                               %t1.s | count 13
-; RUN: grep cgtbi                              %t1.s | count 5
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11
-; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -26,6 +17,10 @@ target triple = "spu"
 
 ; i8 integer comparisons:
 define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_eq_select_i8:
+; CHECK:        ceqb
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp eq i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -33,12 +28,20 @@ entry:
 }
 
 define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_eq_setcc_i8:
+; CHECK:        ceqb
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp eq i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_eq_immed01_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i8 %arg1, 127
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -46,6 +49,10 @@ entry:
 }
 
 define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_eq_immed02_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i8 %arg1, -128
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -53,6 +60,10 @@ entry:
 }
 
 define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_eq_immed03_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp eq i8 %arg1, -1
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -60,6 +71,10 @@ entry:
 }
 
 define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ne_select_i8:
+; CHECK:        ceqb
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ne i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -67,12 +82,21 @@ entry:
 }
 
 define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ne_setcc_i8:
+; CHECK:        ceqb
+; CHECK:        xorbi
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp ne i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ne_immed01_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i8 %arg1, 127
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -80,6 +104,10 @@ entry:
 }
 
 define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ne_immed02_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i8 %arg1, -128
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -87,6 +115,10 @@ entry:
 }
 
 define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ne_immed03_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp ne i8 %arg1, -1
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -94,6 +126,10 @@ entry:
 }
 
 define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ugt_select_i8:
+; CHECK:        clgtb
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ugt i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -101,12 +137,20 @@ entry:
 }
 
 define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ugt_setcc_i8:
+; CHECK:        clgtb
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp ugt i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ugt_immed01_i8:
+; CHECK:        clgtbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ugt i8 %arg1, 126
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -114,6 +158,12 @@ entry:
 }
 
 define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_uge_select_i8:
+; CHECK:        ceqb
+; CHECK:        clgtb
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp uge i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -121,6 +171,12 @@ entry:
 }
 
 define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_uge_setcc_i8:
+; CHECK:        ceqb
+; CHECK:        clgtb
+; CHECK:        or
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp uge i8 %arg1, %arg2
        ret i1 %A
@@ -133,6 +189,12 @@ entry:
 ;; they'll ever be generated.
 
 define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ult_select_i8:
+; CHECK:        ceqb
+; CHECK:        clgtb
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp ult i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -140,12 +202,24 @@ entry:
 }
 
 define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ult_setcc_i8:
+; CHECK:        ceqb
+; CHECK:        clgtb
+; CHECK:        nor
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp ult i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ult_immed01_i8:
+; CHECK:        ceqbi
+; CHECK:        clgtbi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i8 %arg1, 253
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -153,6 +227,12 @@ entry:
 }
 
 define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ult_immed02_i8:
+; CHECK:        ceqbi
+; CHECK:        clgtbi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp ult i8 %arg1, 129
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -160,6 +240,10 @@ entry:
 }
 
 define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ule_select_i8:
+; CHECK:        clgtb
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp ule i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -167,6 +251,11 @@ entry:
 }
 
 define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_ule_setcc_i8:
+; CHECK:        clgtb
+; CHECK:        xorbi
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp ule i8 %arg1, %arg2
        ret i1 %A
@@ -179,6 +268,10 @@ entry:
 ;; they'll ever be generated.
 
 define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sgt_select_i8:
+; CHECK:        cgtb
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sgt i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -186,12 +279,20 @@ entry:
 }
 
 define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sgt_setcc_i8:
+; CHECK:        cgtb
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp sgt i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sgt_immed01_i8:
+; CHECK:        cgtbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i8 %arg1, 96
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -199,6 +300,10 @@ entry:
 }
 
 define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sgt_immed02_i8:
+; CHECK:        cgtbi
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp sgt i8 %arg1, -1
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -206,6 +311,10 @@ entry:
 }
 
 define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sgt_immed03_i8:
+; CHECK:        ceqbi
+; CHECK:        selb $3, $4, $5, $3
+
 entry:
        %A = icmp sgt i8 %arg1, -128
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -213,6 +322,12 @@ entry:
 }
 
 define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sge_select_i8:
+; CHECK:        ceqb
+; CHECK:        cgtb
+; CHECK:        or
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp sge i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -220,6 +335,12 @@ entry:
 }
 
 define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sge_setcc_i8:
+; CHECK:        ceqb
+; CHECK:        cgtb
+; CHECK:        or
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp sge i8 %arg1, %arg2
        ret i1 %A
@@ -232,6 +353,12 @@ entry:
 ;; they'll ever be generated.
 
 define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_slt_select_i8:
+; CHECK:        ceqb
+; CHECK:        cgtb
+; CHECK:        nor
+; CHECK:        selb $3, $6, $5, $3
+
 entry:
        %A = icmp slt i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -239,12 +366,24 @@ entry:
 }
 
 define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_slt_setcc_i8:
+; CHECK:        ceqb
+; CHECK:        cgtb
+; CHECK:        nor
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp slt i8 %arg1, %arg2
        ret i1 %A
 }
 
 define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_slt_immed01_i8:
+; CHECK:        ceqbi
+; CHECK:        cgtbi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i8 %arg1, 96
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -252,6 +391,12 @@ entry:
 }
 
 define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_slt_immed02_i8:
+; CHECK:        ceqbi
+; CHECK:        cgtbi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i8 %arg1, -120
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -259,6 +404,12 @@ entry:
 }
 
 define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_slt_immed03_i8:
+; CHECK:        ceqbi
+; CHECK:        cgtbi
+; CHECK:        nor
+; CHECK:        selb $3, $5, $4, $3
+
 entry:
        %A = icmp slt i8 %arg1, -1
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -266,6 +417,10 @@ entry:
 }
 
 define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sle_select_i8:
+; CHECK:        cgtb
+; CHECK:        selb $3, $5, $6, $3
+
 entry:
        %A = icmp sle i8 %arg1, %arg2
        %B = select i1 %A, i8 %val1, i8 %val2
@@ -273,6 +428,11 @@ entry:
 }
 
 define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK:      icmp_sle_setcc_i8:
+; CHECK:        cgtb
+; CHECK:        xorbi
+; CHECK-NEXT:   bi
+
 entry:
        %A = icmp sle i8 %arg1, %arg2
        ret i1 %A
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index f4aad44..1ccc356 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,20 +1,20 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep {shlh	}  %t1.s | count 10
-; RUN: grep {shlhi	}  %t1.s | count 3
-; RUN: grep {shl	}  %t1.s | count 10
-; RUN: grep {shli	}  %t1.s | count 3
-; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 15
-; RUN: grep {andi	}  %t1.s | count 4
-; RUN: grep {rotmi	}  %t1.s | count 4
-; RUN: grep {rotqmbyi	}  %t1.s | count 1
-; RUN: grep {rotqmbii	}  %t1.s | count 2
-; RUN: grep {rotqmby	}  %t1.s | count 1
-; RUN: grep {rotqmbi	}  %t1.s | count 2
-; RUN: grep {rotqbyi	}  %t1.s | count 1
-; RUN: grep {rotqbii	}  %t1.s | count 2
-; RUN: grep {rotqbybi	}  %t1.s | count 1
-; RUN: grep {sfi	}  %t1.s | count 6
+; RUN: grep "shlh	"  %t1.s | count 10
+; RUN: grep "shlhi	"  %t1.s | count 3
+; RUN: grep "shl	"  %t1.s | count 10
+; RUN: grep "shli	"  %t1.s | count 3
+; RUN: grep "xshw	"  %t1.s | count 5
+; RUN: grep "and	"  %t1.s | count 15
+; RUN: grep "andi	"  %t1.s | count 4
+; RUN: grep "rotmi	"  %t1.s | count 4
+; RUN: grep "rotqmbyi	"  %t1.s | count 1
+; RUN: grep "rotqmbii	"  %t1.s | count 2
+; RUN: grep "rotqmby	"  %t1.s | count 1
+; RUN: grep "rotqmbi	"  %t1.s | count 2
+; RUN: grep "rotqbyi	"  %t1.s | count 1
+; RUN: grep "rotqbii	"  %t1.s | count 2
+; RUN: grep "rotqbybi	"  %t1.s | count 1
+; RUN: grep "sfi	"  %t1.s | count 6
 ; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 6ca5b08..43f8776 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
-; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
+; RUN: grep 'stqd.*0($3)'       %t1.s | count 4
+; RUN: grep 'stqd.*16($3)'      %t1.s | count 4
 ; RUN: grep 16256               %t1.s | count 2
 ; RUN: grep 16384               %t1.s | count 1
 ; RUN: grep 771                 %t1.s | count 4
@@ -8,7 +8,7 @@
 ; RUN: grep 1799                %t1.s | count 2
 ; RUN: grep 1543                %t1.s | count 5
 ; RUN: grep 1029                %t1.s | count 3
-; RUN: grep {shli.*, 4}         %t1.s | count 4
+; RUN: grep 'shli.*, 4'         %t1.s | count 4
 ; RUN: grep stqx                %t1.s | count 4
 ; RUN: grep ilhu                %t1.s | count 11
 ; RUN: grep iohl                %t1.s | count 8
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index d161852..e4c8fb4 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,19 +1,19 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 19
-; RUN: grep {ilhu.*1799}  %t1.s | count 1
-; RUN: grep {ilhu.*771}  %t1.s | count 2
-; RUN: grep {ilhu.*1543}  %t1.s | count 1
-; RUN: grep {ilhu.*1029}  %t1.s | count 1
-; RUN: grep {ilhu.*515}  %t1.s | count 1
-; RUN: grep {ilhu.*3855}  %t1.s | count 1
-; RUN: grep {ilhu.*3599}  %t1.s | count 1
-; RUN: grep {ilhu.*3085}  %t1.s | count 1
-; RUN: grep {iohl.*3855}  %t1.s | count 1
-; RUN: grep {iohl.*3599}  %t1.s | count 2
-; RUN: grep {iohl.*1543}  %t1.s | count 2
-; RUN: grep {iohl.*771}  %t1.s | count 2
-; RUN: grep {iohl.*515}  %t1.s | count 1
-; RUN: grep {iohl.*1799}  %t1.s | count 1
+; RUN: grep "ilhu.*1799"  %t1.s | count 1
+; RUN: grep "ilhu.*771"  %t1.s | count 2
+; RUN: grep "ilhu.*1543"  %t1.s | count 1
+; RUN: grep "ilhu.*1029"  %t1.s | count 1
+; RUN: grep "ilhu.*515"  %t1.s | count 1
+; RUN: grep "ilhu.*3855"  %t1.s | count 1
+; RUN: grep "ilhu.*3599"  %t1.s | count 1
+; RUN: grep "ilhu.*3085"  %t1.s | count 1
+; RUN: grep "iohl.*3855"  %t1.s | count 1
+; RUN: grep "iohl.*3599"  %t1.s | count 2
+; RUN: grep "iohl.*1543"  %t1.s | count 2
+; RUN: grep "iohl.*771"  %t1.s | count 2
+; RUN: grep "iohl.*515"  %t1.s | count 1
+; RUN: grep "iohl.*1799"  %t1.s | count 1
 ; RUN: grep lqa  %t1.s | count 1
 ; RUN: grep cbd  %t1.s | count 4
 ; RUN: grep chd  %t1.s | count 3
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index 928edc4..2dc5c16 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0
 	
 %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
 @search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
deleted file mode 100644
index ad418f7..0000000
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-; PR4317
-
-declare i32 @b()
-
-define void @a() {
-entry:
-  ret void
-
-dummy:
-  invoke i32 @b() to label %reg unwind label %reg
-
-reg:
-  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-            catch i8* null
-  ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2012-06-08-APIntCrash.ll b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
new file mode 100644
index 0000000..2c096bf
--- /dev/null
+++ b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+
+define void @test1(<8 x i32>* %ptr)
+{
+	%1 = load <8 x i32>* %ptr, align 32
+	%2 = and <8 x i32> %1, <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1>
+	store <8 x i32> %2, <8 x i32>* %ptr, align 16
+	ret void
+}
diff --git a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
new file mode 100644
index 0000000..6591c64
--- /dev/null
+++ b/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mcpu=corei7 < %s
+; We don't care about the output, just that it doesn't crash
+
+define <1 x i1> @buildvec_promote() {
+  %cmp = icmp ule <1 x i32> undef, undef
+  %sel = select i1 undef, <1 x i1> undef, <1 x i1> %cmp
+  ret <1 x i1> %sel
+}
diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll
index 605665b..891bbc9 100644
--- a/test/CodeGen/Generic/asm-large-immediate.ll
+++ b/test/CodeGen/Generic/asm-large-immediate.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s | grep 68719476738
+; RUN: llc < %s | FileCheck %s
 
 define void @test() {
 entry:
+; CHECK: /* result: 68719476738 */
         tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+; CHECK: /* result: -68719476738 */
+        tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
         ret void
 }
-
diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll
new file mode 100644
index 0000000..d6ba138
--- /dev/null
+++ b/test/CodeGen/Generic/donothing.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare void @llvm.donothing() readnone
+
+; CHECK: f1
+define void @f1() nounwind uwtable ssp {
+entry:
+; CHECK-NOT donothing
+  invoke void @llvm.donothing()
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+  unreachable
+}
+
+; CHECK: f2
+define void @f2() nounwind {
+entry:
+; CHECK-NOT donothing
+  call void @llvm.donothing()
+  ret void
+}
diff --git a/test/CodeGen/Generic/edge-bundles-blockIDs.ll b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
index b4ae415..d86c758 100644
--- a/test/CodeGen/Generic/edge-bundles-blockIDs.ll
+++ b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
@@ -1,6 +1,6 @@
 ; Make sure EdgeBoundles handles the case when the function size is less then 
 ; the number of block IDs.
-; RUN: llc -regalloc=fast < %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 < %s
 
 define void @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Generic/print-after.ll b/test/CodeGen/Generic/print-after.ll
new file mode 100644
index 0000000..7505907
--- /dev/null
+++ b/test/CodeGen/Generic/print-after.ll
@@ -0,0 +1,6 @@
+; RUN: not llc --help-hidden 2>&1 | FileCheck %s
+
+; CHECK: -print-after
+; CHECK-NOT: -print-after-all
+; CHECK: =simple-register-coalescing
+; CHECK: -print-after-all
diff --git a/test/CodeGen/Generic/print-machineinstrs.ll b/test/CodeGen/Generic/print-machineinstrs.ll
new file mode 100644
index 0000000..75dceb5
--- /dev/null
+++ b/test/CodeGen/Generic/print-machineinstrs.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs=branch-folder -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs= -o /dev/null 2>&1 | FileCheck %s
+
+define i64 @foo(i64 %a, i64 %b) nounwind {
+; CHECK: -branch-folder -print-machineinstrs
+; CHECK: Control Flow Optimizer
+; CHECK-NEXT: MachineFunction Printer
+; CHECK: Machine code for function foo:
+  %c = add i64 %a, %b
+  %d = trunc i64 %c to i32
+  %e = zext i32 %d to i64
+  ret i64 %e
+}
diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll
new file mode 100644
index 0000000..557e097
--- /dev/null
+++ b/test/CodeGen/Generic/stop-after.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -debug-pass=Structure -stop-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP
+; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START
+
+; STOP: -loop-reduce -print-module
+; STOP: Loop Strength Reduction
+; STOP-NEXT: Machine Function Analysis
+
+; START: -machine-branch-prob -gc-lowering
+; START: FunctionPass Manager
+; START-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Generic/undef-phi.ll b/test/CodeGen/Generic/undef-phi.ll
new file mode 100644
index 0000000..10899f9
--- /dev/null
+++ b/test/CodeGen/Generic/undef-phi.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs -verify-coalescing
+;
+; This function has a PHI with one undefined input. Verify that PHIElimination
+; inserts an IMPLICIT_DEF instruction in the predecessor so all paths to the use
+; pass through a def.
+
+%struct.xx_stack = type { i32, %struct.xx_stack* }
+
+define i32 @push(%struct.xx_stack* %stack) nounwind uwtable readonly ssp {
+entry:
+  %tobool1 = icmp eq %struct.xx_stack* %stack, null
+  br i1 %tobool1, label %for.end, label %for.body
+
+for.body:
+  %stack.addr.02 = phi %struct.xx_stack* [ %0, %for.body ], [ %stack, %entry ]
+  %next = getelementptr inbounds %struct.xx_stack* %stack.addr.02, i64 0, i32 1
+  %0 = load %struct.xx_stack** %next, align 8
+  %tobool = icmp eq %struct.xx_stack* %0, null
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:
+  %top.0.lcssa = phi %struct.xx_stack* [ undef, %entry ], [ %stack.addr.02, %for.body ]
+  %first = getelementptr inbounds %struct.xx_stack* %top.0.lcssa, i64 0, i32 0
+  %1 = load i32* %first, align 4
+  ret i32 %1
+}
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 69002e0..e9ac8b6 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = #7
 ; CHECK: memw(r29 + #0) = r[[T0]]
 ; CHECK: r0 = #1
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 36abd74..7219985 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/Hexagon/convertdptoint.ll b/test/CodeGen/Hexagon/convertdptoint.ll
new file mode 100644
index 0000000..fa068c4
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertdptoint.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 32-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_df2w(r{{[0-9]+}}:{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store i32 0, i32* %retval
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %a, align 8
+  %1 = load double* %b, align 8
+  %add = fadd double %0, %1
+  store double %add, double* %c, align 8
+  %2 = load double* %c, align 8
+  %conv = fptosi double %2 to i32
+  store i32 %conv, i32* %i, align 4
+  %3 = load i32* %i, align 4
+  ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/convertdptoll.ll b/test/CodeGen/Hexagon/convertdptoll.ll
new file mode 100644
index 0000000..1b4dd86
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertdptoll.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 64-bit integer value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = convert_df2d(r{{[0-9]+}}:{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i64, align 8
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store i32 0, i32* %retval
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %a, align 8
+  %1 = load double* %b, align 8
+  %add = fadd double %0, %1
+  store double %add, double* %c, align 8
+  %2 = load double* %c, align 8
+  %conv = fptosi double %2 to i64
+  store i64 %conv, i64* %i, align 8
+  %3 = load i64* %i, align 8
+  %conv1 = trunc i64 %3 to i32
+  ret i32 %conv1
+}
diff --git a/test/CodeGen/Hexagon/convertsptoint.ll b/test/CodeGen/Hexagon/convertsptoint.ll
new file mode 100644
index 0000000..b8a9d6c
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertsptoint.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate conversion from single precision floating point
+; to 32-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_sf2w(r{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  store i32 0, i32* %retval
+  store float 0x402ECCCCC0000000, float* %a, align 4
+  store float 0x4022333340000000, float* %b, align 4
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %add = fadd float %0, %1
+  store float %add, float* %c, align 4
+  %2 = load float* %c, align 4
+  %conv = fptosi float %2 to i32
+  store i32 %conv, i32* %i, align 4
+  %3 = load i32* %i, align 4
+  ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/convertsptoll.ll b/test/CodeGen/Hexagon/convertsptoll.ll
new file mode 100644
index 0000000..1c4df94
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertsptoll.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate conversion from single precision floating point
+; to 64-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = convert_sf2d(r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i64, align 8
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  store i32 0, i32* %retval
+  store float 0x402ECCCCC0000000, float* %a, align 4
+  store float 0x4022333340000000, float* %b, align 4
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %add = fadd float %0, %1
+  store float %add, float* %c, align 4
+  %2 = load float* %c, align 4
+  %conv = fptosi float %2 to i64
+  store i64 %conv, i64* %i, align 8
+  %3 = load i64* %i, align 8
+  %conv1 = trunc i64 %3 to i32
+  ret i32 %conv1
+}
diff --git a/test/CodeGen/Hexagon/dadd.ll b/test/CodeGen/Hexagon/dadd.ll
new file mode 100644
index 0000000..602978a
--- /dev/null
+++ b/test/CodeGen/Hexagon/dadd.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate double precision floating point add in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfadd(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %a, align 8
+  %1 = load double* %b, align 8
+  %add = fadd double %0, %1
+  store double %add, double* %c, align 8
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/dmul.ll b/test/CodeGen/Hexagon/dmul.ll
new file mode 100644
index 0000000..d743773
--- /dev/null
+++ b/test/CodeGen/Hexagon/dmul.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate double precision floating point multiply in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfmpy(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %b, align 8
+  %1 = load double* %a, align 8
+  %mul = fmul double %0, %1
+  store double %mul, double* %c, align 8
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index 04c2ec1..c3b6f37 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_adddf3
 ; CHECK: __hexagon_subdf3
 
diff --git a/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
new file mode 100644
index 0000000..54e7ce3
--- /dev/null
+++ b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-hexagon-ieee-rnd-near < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 32-bit int value in IEEE rounding to the nearest mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_df2w(r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store i32 0, i32* %retval
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %a, align 8
+  %1 = load double* %b, align 8
+  %add = fadd double %0, %1
+  store double %add, double* %c, align 8
+  %2 = load double* %c, align 8
+  %conv = fptosi double %2 to i32
+  store i32 %conv, i32* %i, align 4
+  %3 = load i32* %i, align 4
+  ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/dsub.ll b/test/CodeGen/Hexagon/dsub.ll
new file mode 100644
index 0000000..4f9d39e
--- /dev/null
+++ b/test/CodeGen/Hexagon/dsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate double precision floating point subtract in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfsub(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %c = alloca double, align 8
+  store double 1.540000e+01, double* %a, align 8
+  store double 9.100000e+00, double* %b, align 8
+  %0 = load double* %b, align 8
+  %1 = load double* %a, align 8
+  %sub = fsub double %0, %1
+  store double %sub, double* %c, align 8
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
new file mode 100644
index 0000000..9b27dda
--- /dev/null
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate dual stores in one packet in V4
+
+; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK-NEXT: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK-NEXT: }
+
+@Reg = global i32 0, align 4
+define i32 @main() nounwind {
+entry:
+  %number= alloca i32, align 4
+  store i32 500000, i32* %number, align 4
+  %number1= alloca i32, align 4
+  store i32 100000, i32* %number1, align 4
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/fadd.ll b/test/CodeGen/Hexagon/fadd.ll
new file mode 100644
index 0000000..b95e147
--- /dev/null
+++ b/test/CodeGen/Hexagon/fadd.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate sp floating point add in V5.
+
+; CHECK: r{{[0-9]+}} = sfadd(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  store float 0x402ECCCCC0000000, float* %a, align 4
+  store float 0x4022333340000000, float* %b, align 4
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %add = fadd float %0, %1
+  store float %add, float* %c, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/fcmp.ll b/test/CodeGen/Hexagon/fcmp.ll
new file mode 100644
index 0000000..e7b649e
--- /dev/null
+++ b/test/CodeGen/Hexagon/fcmp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate floating point compare in V5
+
+; CHECK: p{{[0-2]+}} = sfcmp.{{.}}
+
+define i32 @foo(float %y) nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %y.addr = alloca float, align 4
+  store float %y, float* %y.addr, align 4
+  %0 = load float* %y.addr, align 4
+  %cmp = fcmp ogt float %0, 0x406AD7EFA0000000
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  store i32 2, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %a = alloca float, align 4
+  store i32 0, i32* %retval
+  store float 0x40012E0A00000000, float* %a, align 4
+  %0 = load float* %a, align 4
+  %call = call i32 @foo(float %0)
+  ret i32 %call
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index 51acf2e..bec9f58 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_addsf3
 ; CHECK: __hexagon_subsf3
 
diff --git a/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
new file mode 100644
index 0000000..bec9f58
--- /dev/null
+++ b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+  %acc.addr = alloca float*, align 4
+  %num.addr = alloca float, align 4
+  %num2.addr = alloca float, align 4
+  store float* %acc, float** %acc.addr, align 4
+  store float %num, float* %num.addr, align 4
+  store float %num2, float* %num2.addr, align 4
+  %0 = load float** %acc.addr, align 4
+  %1 = load float* %0
+  %2 = load float* %num.addr, align 4
+  %add = fadd float %1, %2
+  %3 = load float* %num2.addr, align 4
+  %sub = fsub float %add, %3
+  %4 = load float** %acc.addr, align 4
+  store float %sub, float* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/fmul.ll b/test/CodeGen/Hexagon/fmul.ll
new file mode 100644
index 0000000..4766845
--- /dev/null
+++ b/test/CodeGen/Hexagon/fmul.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate single precision floating point multiply in V5.
+
+; CHECK: r{{[0-9]+}} = sfmpy(r{{[0-9]+}}, r{{[0-9]+}})
+
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  store float 0x402ECCCCC0000000, float* %a, align 4
+  store float 0x4022333340000000, float* %b, align 4
+  %0 = load float* %b, align 4
+  %1 = load float* %a, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* %c, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index c0a9fda..dc87c73 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Hexagon/fsub.ll b/test/CodeGen/Hexagon/fsub.ll
new file mode 100644
index 0000000..07c866f
--- /dev/null
+++ b/test/CodeGen/Hexagon/fsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Check that we generate sp floating point subtract in V5.
+
+; CHECK: r{{[0-9]+}} = sfsub(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  store float 0x402ECCCCC0000000, float* %a, align 4
+  store float 0x4022333340000000, float* %b, align 4
+  %0 = load float* %b, align 4
+  %1 = load float* %a, align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* %c, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/fusedandshift.ll b/test/CodeGen/Hexagon/fusedandshift.ll
new file mode 100644
index 0000000..022b3c6
--- /dev/null
+++ b/test/CodeGen/Hexagon/fusedandshift.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4  < %s | FileCheck %s
+; Check that we generate fused logical and with shift instruction.
+
+; CHECK: r{{[0-9]+}} = and(#15, lsr(r{{[0-9]+}}, #{{[0-9]+}})
+
+define i32 @main(i16* %a, i16* %b) nounwind {
+  entry:
+  %0 = load i16* %a, align 2
+  %conv1 = sext i16 %0 to i32
+  %shr1 = ashr i32 %conv1, 3
+  %and1 = and i32 %shr1, 15
+  %conv2 = trunc i32 %and1 to i16
+  store i16 %conv2, i16* %b, align 2
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/macint.ll b/test/CodeGen/Hexagon/macint.ll
new file mode 100644
index 0000000..b3b9d0e
--- /dev/null
+++ b/test/CodeGen/Hexagon/macint.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4  < %s | FileCheck %s
+; Check that we generate integer multiply accumulate.
+
+; CHECK: r{{[0-9]+}} += mpyi(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main(i32* %a, i32* %b) nounwind {
+  entry:
+  %0 = load i32* %a, align 4
+  %div = udiv i32 %0, 10000
+  %rem = urem i32 %div, 10
+  store i32 %rem, i32* %b, align 4
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index afd6fc6..d5c5ae3 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: += mpyi
 
 define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
diff --git a/test/CodeGen/Hexagon/newvaluejump.ll b/test/CodeGen/Hexagon/newvaluejump.ll
new file mode 100644
index 0000000..9c7ca55
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluejump.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate new value jump.
+
+@i = global i32 0, align 4
+@j = global i32 10, align 4
+
+define i32 @foo(i32 %a) nounwind {
+entry:
+; CHECK: if (cmp.eq(r{{[0-9]+}}.new, #0)) jump{{.}}
+  %addr1 = alloca i32, align 4
+  %addr2 = alloca i32, align 4
+  %0 = load i32* @i, align 4
+  store i32 %0, i32* %addr1, align 4
+  call void @bar(i32 1, i32 2)
+  %1 = load i32* @j, align 4
+  %tobool = icmp ne i32 %1, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+  call void @baz(i32 1, i32 2)
+  br label %if.end
+
+if.else:
+  call void @guy(i32 10, i32 20)
+  br label %if.end
+
+if.end:
+  ret i32 0
+}
+
+declare void @guy(i32, i32)
+declare void @bar(i32, i32)
+declare void @baz(i32, i32)
diff --git a/test/CodeGen/Hexagon/newvaluejump2.ll b/test/CodeGen/Hexagon/newvaluejump2.ll
new file mode 100644
index 0000000..3d50ea5
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluejump2.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate new value jump, both registers, with one 
+; of the registers as new.
+
+@Reg = common global i8 0, align 1
+define i32 @main() nounwind {
+entry:
+; CHECK: if (cmp.gt(r{{[0-9]+}}.new, r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
+  %Reg2 = alloca i8, align 1
+  %0 = load i8* %Reg2, align 1
+  %conv0 = zext i8 %0 to i32
+  %1 = load i8* @Reg, align 1
+  %conv1 = zext i8 %1 to i32
+  %tobool = icmp sle i32 %conv0, %conv1
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+  call void @bar(i32 1, i32 2)
+  br label %if.end
+
+if.else:
+  call void @baz(i32 10, i32 20)
+  br label %if.end
+
+if.end:
+  ret i32 0
+}
+
+declare void @bar(i32, i32)
+declare void @baz(i32, i32)
diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll
new file mode 100644
index 0000000..ab69b22
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluestore.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4  < %s | FileCheck %s
+; Check that we generate new value store packet in V4
+
+@i = global i32 0, align 4
+@j = global i32 10, align 4
+@k = global i32 100, align 4
+
+define i32 @main() nounwind {
+entry:
+; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}.new
+  %number1 = alloca i32, align 4
+  %number2 = alloca i32, align 4
+  %number3 = alloca i32, align 4
+  %0 = load i32 * @i, align 4
+  store i32 %0, i32* %number1, align 4
+  %1 = load i32 * @j, align 4
+  store i32 %1, i32* %number2, align 4
+  %2 = load i32 * @k, align 4
+  store i32 %2, i32* %number3, align 4
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
new file mode 100644
index 0000000..31b56fd
--- /dev/null
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5  < %s | FileCheck %s
+; Optimize fabsf to clrbit in V5.
+
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
+
+define float @my_fabsf(float %x) nounwind {
+entry:
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  %0 = load float* %x.addr, align 4
+  %call = call float @fabsf(float %0) readnone
+  ret float %call
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/Hexagon/opt-fneg.ll b/test/CodeGen/Hexagon/opt-fneg.ll
new file mode 100644
index 0000000..479b4b6
--- /dev/null
+++ b/test/CodeGen/Hexagon/opt-fneg.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Optimize fneg to togglebit in V5.
+
+define float @foo(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  %0 = load float* %x.addr, align 4
+  %sub = fsub float -0.000000e+00, %0
+  ret float %sub
+}
+
+define float @bar(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+  %sub = fsub float -0.000000e+00, %x
+  ret float %sub
+}
+
+define float @baz(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+  %conv1 = fmul float %x, -1.000000e+00
+  ret float %conv1
+}
diff --git a/test/CodeGen/Hexagon/simpletailcall.ll b/test/CodeGen/Hexagon/simpletailcall.ll
new file mode 100644
index 0000000..2876404
--- /dev/null
+++ b/test/CodeGen/Hexagon/simpletailcall.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: foo_empty
+; CHECK-NOT: allocframe
+; CHECK-NOT: memd(r29
+; CHECK: jump bar_empty
+
+define void @foo_empty(i32 %h) nounwind {
+entry:
+  %add = add nsw i32 %h, 3
+  %call = tail call i32 bitcast (i32 (...)* @bar_empty to i32 (i32)*)(i32 %add) nounwind
+  ret void
+}
+
+declare i32 @bar_empty(...)
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index c63a3ba..2e4ab63 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,13 +1,12 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
 @val = external global i32
 
-; CHECK: CONST32(#num)
-; CHECK: CONST32(#acc)
-; CHECK: CONST32(#val)
+; CHECK: memw(##num)
+; CHECK: memw(##acc)
+; CHECK: memw(##val)
 
 define void @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 2c962d0..e488f33 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,6 +1,6 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
 %struct.small = type { i32, i32 }
 
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index 69de4f6..f09fd10 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,8 +1,7 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
-; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
-; CHECK: memw(r29 + #0) = r[[T1]]
+; CHECK: memw(r29 + #0) = r{{.}}
+; CHECK: memw(r29+#8) = r{{.}}
 
 %struct.large = type { i64, i64 }
 
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 788e474..01d2041 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
index b92477b..c3d69c7 100644
--- a/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
+++ b/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
@@ -5,9 +5,9 @@ target triple = "msp430-unknown-linux-gnu"
 
 define msp430_intrcc void @foo() nounwind {
 entry:
-	%fa = call i16* @llvm.frameaddress(i32 0)
-	store i16 0, i16* %fa
+	%fa = call i8* @llvm.frameaddress(i32 0)
+	store i8 0, i8* %fa
 	ret void
 }
 
-declare i16* @llvm.frameaddress(i32)
+declare i8* @llvm.frameaddress(i32)
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 45342e2..b9c17d9 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -4,7 +4,7 @@ target triple = "msp430-generic-generic"
 
 define i8 @mov(i8 %a, i8 %b) nounwind {
 ; CHECK: mov:
-; CHECK: mov.b	r14, r15
+; CHECK: mov.{{[bw]}} r14, r15
 	ret i8 %b
 }
 
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 519e4b9..9c547f1 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=mips -o %t
-; RUN: grep {c\\..*\\.s} %t | count 3
-; RUN: grep {bc1\[tf\]} %t | count 3
+; RUN: grep "c\..*\.s" %t | count 3
+; RUN: grep "bc1[tf]" %t | count 3
 
 ; FIXME: Disabled because branch instructions are generated where
 ; conditional move instructions are expected.
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index e85a749..e88e3d3 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
+; RUN: llc < %s -march=mips | grep "b[ne][eq]" | count 1
 
 ; FIXME: Disabled because branch instructions are generated where
 ; conditional move instructions are expected.
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index aaf6767..261fe9d 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -7,19 +7,22 @@ entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
   store volatile i32 2, i32* %x, align 4
   %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
-; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
-; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
-; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
-; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
-; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
-; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
-; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
-; PIC-O32: jr  $[[R1]]
-; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
-; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
-; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
-; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
-; PIC-N64: jr  $[[R1]]
+; STATIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; STATIC-O32: lw $[[R3:[0-9]+]], %lo($JTI0_0)($[[R2]])
+; PIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R1:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; PIC-O32: lw $[[R4:[0-9]+]], %lo($JTI0_0)($[[R2]])
+; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; PIC-O32: jr  $[[R5]]
+; PIC-N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
+; PIC-N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
+; PIC-N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
+; PIC-N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; PIC-N64: jr  $[[R5]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -30,7 +33,6 @@ entry:
 bb1:                                              ; preds = %entry
   ret i32 2
 
-; CHECK: STATIC-O32: $BB0_2
 bb2:                                              ; preds = %entry
   ret i32 0
 
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 15c73e2..29f43c8 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -4,14 +4,10 @@ define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
 ; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
 ; CHECK: addu  $sp, $zero, $[[T2]]
-; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
-; CHECK: lw    $[[T4:[0-9]+]], %call16(foo)($gp)
-; CHECK: addu  $25, $zero, $[[T4]]
-; CHECK: addu  $4, $zero, $[[T1]]
-; CHECK: jalr  $25
+; CHECK: addu  $4, $zero, $[[T0]]
+; CHECK: addu  $4, $zero, $[[T2]]
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
   store i8 97, i8* %add.ptr, align 1
@@ -31,14 +27,9 @@ declare i32 @foo(i8*)
 
 define i32 @alloca2(i32 %size) nounwind {
 entry:
-; dynamic allocated stack area and $gp restore slot have the same offsets
-; relative to $sp.
-;
 ; CHECK: alloca2
-; CHECK: .cprestore [[OFF:[0-9]+]]
-; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: subu  $[[T0:[0-9]+]], $sp
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF]]
 
   %tmp1 = alloca i8, i32 %size, align 4
   %0 = bitcast i8* %tmp1 to i32*
@@ -46,7 +37,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 40
+; CHECK: addiu $4, $[[T0]], 40
 
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
   %1 = bitcast i8* %add.ptr to i32*
@@ -56,7 +47,7 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 if.else:                                          ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 12
+; CHECK: addiu $4, $[[T0]], 12
 
   %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
   %2 = bitcast i8* %add.ptr5 to i32*
@@ -64,7 +55,7 @@ if.else:                                          ; preds = %entry
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-; CHECK: lw  $5, 0($[[T1]])
+; CHECK: lw  $5, 0($[[T0]])
 ; CHECK: lw  $25, %call16(printf)
 
   %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index bc5bcc3..8ec5d93 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -2,9 +2,8 @@
 
 define double @foo(double %a, double %b) nounwind readnone {
 entry:
-; CHECK: bc1f $BB0_2
+; CHECK: bc1f $BB
 ; CHECK: nop
-; CHECK: # BB#1:    
 
   %cmp = fcmp ogt double %a, 0.000000e+00
   br i1 %cmp, label %if.end6, label %if.else
@@ -26,9 +25,8 @@ return:                                           ; preds = %if.else, %if.end6
 
 define void @f1(float %f) nounwind {
 entry:
-; CHECK: bc1f $BB1_1
+; CHECK: bc1f $BB
 ; CHECK: nop
-; CHECK: # BB#2:
   %cmp = fcmp une float %f, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
new file mode 100644
index 0000000..4ff1204
--- /dev/null
+++ b/test/CodeGen/Mips/and1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %and = and i32 %0, %1
+; 16:	and	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %and)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/asm-large-immediate.ll b/test/CodeGen/Mips/asm-large-immediate.ll
new file mode 100644
index 0000000..246fff6
--- /dev/null
+++ b/test/CodeGen/Mips/asm-large-immediate.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+define void @test() {
+entry:
+; CHECK: /* result: 68719476738 */
+        tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+; CHECK: /* result: -68719476738 */
+        tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+        ret void
+}
+
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index a4763b1..050689d 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -8,7 +8,7 @@ entry:
   ret i32 %0
 
 ; CHECK:   AtomicLoadAdd32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
 ; CHECK:   $[[BB0:[A-Z_0-9]+]]:
 ; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
 ; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
@@ -22,7 +22,7 @@ entry:
   ret i32 %0
 
 ; CHECK:   AtomicLoadNand32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
 ; CHECK:   $[[BB0:[A-Z_0-9]+]]:
 ; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
 ; CHECK:   and     $[[R3:[0-9]+]], $[[R1]], $4
@@ -40,7 +40,7 @@ entry:
   ret i32 %0
 
 ; CHECK:   AtomicSwap32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
 ; CHECK:   $[[BB0:[A-Z_0-9]+]]:
 ; CHECK:   ll      ${{[0-9]+}}, 0($[[R0]])
 ; CHECK:   sc      $[[R2:[0-9]+]], 0($[[R0]])
@@ -56,7 +56,7 @@ entry:
   ret i32 %0
 
 ; CHECK:   AtomicCmpSwap32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
 ; CHECK:   $[[BB0:[A-Z_0-9]+]]:
 ; CHECK:   ll      $2, 0($[[R0]])
 ; CHECK:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
@@ -75,7 +75,7 @@ entry:
   ret i8 %0
 
 ; CHECK:   AtomicLoadAdd8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
 ; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
 ; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
@@ -106,7 +106,7 @@ entry:
   ret i8 %0
 
 ; CHECK:   AtomicLoadSub8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
 ; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
 ; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
@@ -137,7 +137,7 @@ entry:
   ret i8 %0
 
 ; CHECK:   AtomicLoadNand8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
 ; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
 ; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
@@ -169,7 +169,7 @@ entry:
   ret i8 %0
 
 ; CHECK:   AtomicSwap8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
 ; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
 ; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
@@ -198,7 +198,7 @@ entry:
   ret i8 %0
 
 ; CHECK:   AtomicCmpSwap8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
 ; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
 ; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
@@ -242,3 +242,19 @@ entry:
 ; CHECK:   sync 0
 }
 
+; make sure that this assertion in
+; TwoAddressInstructionPass::TryInstructionTransform does not fail:
+;
+; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+;
+; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an
+; operand of an atomic instruction with register $zero. 
+@a = external global i32
+
+define i32 @zeroreg() nounwind {
+entry:
+  %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst
+  %1 = icmp eq i32 %0, 1
+  %conv = zext i1 %1 to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 03254a9..3af899a 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -5,10 +5,12 @@
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; O32:  lw  ${{[0-9]+}}, %got(i3)($gp)
-; O32:  addiu ${{[0-9]+}}, $gp, %got(i1)
-; N64:  ld  ${{[0-9]+}}, %got_disp(i3)($gp)
-; N64:  daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
+; O32:  lw $[[R0:[0-9]+]], %got(i3)
+; O32:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1) 
+; O32:  movn $[[R0]], $[[R1]], ${{[0-9]+}} 
+; N64:  ldr $[[R0:[0-9]+]] 
+; N64:  ld $[[R1:[0-9]+]], %got_disp(i1)
+; N64:  movn $[[R0]], $[[R1]], ${{[0-9]+}} 
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -21,12 +23,12 @@ entry:
 @d = global i32 0, align 4
 
 ; O32: cmov2:
-; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: addiu $[[R1:[0-9]+]], ${{[a-z0-9]+}}, %got(d)
+; O32: addiu $[[R0:[0-9]+]], ${{[a-z0-9]+}}, %got(c)
 ; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 ; N64: cmov2:
-; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
-; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
 ; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
@@ -37,3 +39,23 @@ entry:
   ret i32 %cond
 }
 
+; O32: cmov3:
+; O32: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %cmp = icmp eq i32 %a, 234
+  %cond = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %cond
+}
+
+; N64: cmov4:
+; N64: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+entry:
+  %cmp = icmp eq i32 %a, 234
+  %cond = select i1 %cmp, i64 %b, i64 %c
+  ret i64 %cond
+}
+
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index 57d022f..a618b67 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; DISABLE: llc -march=mipsel < %s | FileCheck %s
+; RUN: false
+; XFAIL: *
 
 ; CHECK: .set macro
 ; CHECK: .set at
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 2e2f9a4..d14150a 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -15,7 +15,6 @@ entry:
 ; CHECK-EB:  .cfi_offset 53, -8
 ; CHECK-EB:  .cfi_offset 52, -4
 ; CHECK-EL:  .cfi_offset 31, -12
-; CHECK-EL:  .cprestore 
 
   %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
   %0 = bitcast i8* %exception to double*
@@ -25,7 +24,6 @@ entry:
 
 lpad:                                             ; preds = %entry
 ; CHECK-EL:  # %lpad
-; CHECK-EL:  lw  $gp
 ; CHECK-EL:  bne $5
 
   %exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
index b296ab3..49d8a72 100644
--- a/test/CodeGen/Mips/fabs.ll
+++ b/test/CodeGen/Mips/fabs.ll
@@ -1,8 +1,8 @@
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
-; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
 
 define float @foo0(float %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
new file mode 100644
index 0000000..82919e7
--- /dev/null
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -0,0 +1,253 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s 
+
+@gi0 = external global i32
+@gi1 = external global i32
+@gi2 = external global i32
+@gi3 = external global i32
+@gi4 = external global i32
+@gi5 = external global i32
+@gi6 = external global i32
+@gi7 = external global i32
+@gi8 = external global i32
+@gi9 = external global i32
+@gi10 = external global i32
+@gi11 = external global i32
+@gi12 = external global i32
+@gi13 = external global i32
+@gi14 = external global i32
+@gi15 = external global i32
+@gi16 = external global i32
+@gfa0 = external global float
+@gfa1 = external global float
+@gfa2 = external global float
+@gfa3 = external global float
+@gfa4 = external global float
+@gfa5 = external global float
+@gfa6 = external global float
+@gfa7 = external global float
+@gfa8 = external global float
+@gfa9 = external global float
+@gfa10 = external global float
+@gfa11 = external global float
+@gfa12 = external global float
+@gfa13 = external global float
+@gfa14 = external global float
+@gfa15 = external global float
+@gfa16 = external global float
+@gfa17 = external global float
+@gfa18 = external global float
+@gfa19 = external global float
+@gfa20 = external global float
+@gf0 = external global float
+@gf1 = external global float
+@gf2 = external global float
+@gf3 = external global float
+@gf4 = external global float
+@gf5 = external global float
+@gf6 = external global float
+@gf7 = external global float
+@gf8 = external global float
+@gf9 = external global float
+@gf10 = external global float
+@gf11 = external global float
+@gf12 = external global float
+@gf13 = external global float
+@gf14 = external global float
+@gf15 = external global float
+@gf16 = external global float
+@gf17 = external global float
+@gf18 = external global float
+@gf19 = external global float
+@gf20 = external global float
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+@g3 = external global i32
+@g4 = external global i32
+@g5 = external global i32
+@g6 = external global i32
+@g7 = external global i32
+@g8 = external global i32
+@g9 = external global i32
+@g10 = external global i32
+@g11 = external global i32
+@g12 = external global i32
+@g13 = external global i32
+@g14 = external global i32
+@g15 = external global i32
+@g16 = external global i32
+
+define void @caller0() nounwind {
+entry:
+; CHECK: caller0
+; CHECK: lw  $3
+; CHECK: lw  $24
+; CHECK: lw  $15
+; CHECK: lw  $14
+; CHECK: lw  $13
+; CHECK: lw  $12
+; CHECK: lw  $11
+; CHECK: lw  $10
+; CHECK: lw  $9
+; CHECK: lw  $8
+; CHECK: lw  $7
+; CHECK: lw  $6
+; CHECK: lw  $5
+; CHECK: lw  $4
+
+  %0 = load i32* @gi0, align 4
+  %1 = load i32* @gi1, align 4
+  %2 = load i32* @gi2, align 4
+  %3 = load i32* @gi3, align 4
+  %4 = load i32* @gi4, align 4
+  %5 = load i32* @gi5, align 4
+  %6 = load i32* @gi6, align 4
+  %7 = load i32* @gi7, align 4
+  %8 = load i32* @gi8, align 4
+  %9 = load i32* @gi9, align 4
+  %10 = load i32* @gi10, align 4
+  %11 = load i32* @gi11, align 4
+  %12 = load i32* @gi12, align 4
+  %13 = load i32* @gi13, align 4
+  %14 = load i32* @gi14, align 4
+  %15 = load i32* @gi15, align 4
+  %16 = load i32* @gi16, align 4
+  tail call fastcc void @callee0(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16)
+  ret void
+}
+
+define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline {
+entry:
+; CHECK: callee0
+; CHECK: sw  $4
+; CHECK: sw  $5
+; CHECK: sw  $6
+; CHECK: sw  $7
+; CHECK: sw  $8
+; CHECK: sw  $9
+; CHECK: sw  $10
+; CHECK: sw  $11
+; CHECK: sw  $12
+; CHECK: sw  $13
+; CHECK: sw  $14
+; CHECK: sw  $15
+; CHECK: sw  $24
+; CHECK: sw  $3
+
+  store i32 %a0, i32* @g0, align 4
+  store i32 %a1, i32* @g1, align 4
+  store i32 %a2, i32* @g2, align 4
+  store i32 %a3, i32* @g3, align 4
+  store i32 %a4, i32* @g4, align 4
+  store i32 %a5, i32* @g5, align 4
+  store i32 %a6, i32* @g6, align 4
+  store i32 %a7, i32* @g7, align 4
+  store i32 %a8, i32* @g8, align 4
+  store i32 %a9, i32* @g9, align 4
+  store i32 %a10, i32* @g10, align 4
+  store i32 %a11, i32* @g11, align 4
+  store i32 %a12, i32* @g12, align 4
+  store i32 %a13, i32* @g13, align 4
+  store i32 %a14, i32* @g14, align 4
+  store i32 %a15, i32* @g15, align 4
+  store i32 %a16, i32* @g16, align 4
+  ret void
+}
+
+define void @caller1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind {
+entry:
+; CHECK: caller1
+; CHECK: lwc1  $f19
+; CHECK: lwc1  $f18
+; CHECK: lwc1  $f17
+; CHECK: lwc1  $f16
+; CHECK: lwc1  $f15
+; CHECK: lwc1  $f14
+; CHECK: lwc1  $f13
+; CHECK: lwc1  $f12
+; CHECK: lwc1  $f11
+; CHECK: lwc1  $f10
+; CHECK: lwc1  $f9
+; CHECK: lwc1  $f8
+; CHECK: lwc1  $f7
+; CHECK: lwc1  $f6
+; CHECK: lwc1  $f5
+; CHECK: lwc1  $f4
+; CHECK: lwc1  $f3
+; CHECK: lwc1  $f2
+; CHECK: lwc1  $f1
+; CHECK: lwc1  $f0
+
+  %0 = load float* @gfa0, align 4
+  %1 = load float* @gfa1, align 4
+  %2 = load float* @gfa2, align 4
+  %3 = load float* @gfa3, align 4
+  %4 = load float* @gfa4, align 4
+  %5 = load float* @gfa5, align 4
+  %6 = load float* @gfa6, align 4
+  %7 = load float* @gfa7, align 4
+  %8 = load float* @gfa8, align 4
+  %9 = load float* @gfa9, align 4
+  %10 = load float* @gfa10, align 4
+  %11 = load float* @gfa11, align 4
+  %12 = load float* @gfa12, align 4
+  %13 = load float* @gfa13, align 4
+  %14 = load float* @gfa14, align 4
+  %15 = load float* @gfa15, align 4
+  %16 = load float* @gfa16, align 4
+  %17 = load float* @gfa17, align 4
+  %18 = load float* @gfa18, align 4
+  %19 = load float* @gfa19, align 4
+  %20 = load float* @gfa20, align 4
+  tail call fastcc void @callee1(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20)
+  ret void
+}
+
+define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
+entry:
+; CHECK: callee1
+; CHECK: swc1  $f0
+; CHECK: swc1  $f1
+; CHECK: swc1  $f2
+; CHECK: swc1  $f3
+; CHECK: swc1  $f4
+; CHECK: swc1  $f5
+; CHECK: swc1  $f6
+; CHECK: swc1  $f7
+; CHECK: swc1  $f8
+; CHECK: swc1  $f9
+; CHECK: swc1  $f10
+; CHECK: swc1  $f11
+; CHECK: swc1  $f12
+; CHECK: swc1  $f13
+; CHECK: swc1  $f14
+; CHECK: swc1  $f15
+; CHECK: swc1  $f16
+; CHECK: swc1  $f17
+; CHECK: swc1  $f18
+; CHECK: swc1  $f19
+
+  store float %a0, float* @gf0, align 4
+  store float %a1, float* @gf1, align 4
+  store float %a2, float* @gf2, align 4
+  store float %a3, float* @gf3, align 4
+  store float %a4, float* @gf4, align 4
+  store float %a5, float* @gf5, align 4
+  store float %a6, float* @gf6, align 4
+  store float %a7, float* @gf7, align 4
+  store float %a8, float* @gf8, align 4
+  store float %a9, float* @gf9, align 4
+  store float %a10, float* @gf10, align 4
+  store float %a11, float* @gf11, align 4
+  store float %a12, float* @gf12, align 4
+  store float %a13, float* @gf13, align 4
+  store float %a14, float* @gf14, align 4
+  store float %a15, float* @gf15, align 4
+  store float %a16, float* @gf16, align 4
+  store float %a17, float* @gf17, align 4
+  store float %a18, float* @gf18, align 4
+  store float %a19, float* @gf19, align 4
+  store float %a20, float* @gf20, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index 08bd6e7..1c4a3fd 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -28,7 +28,7 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   %0 = load float* %arrayidx1, align 1
   ret float %0
@@ -54,7 +54,7 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   %0 = load float* @gf, align 4
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   store float %0, float* %arrayidx1, align 1
@@ -64,7 +64,7 @@ entry:
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
 ; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   %0 = load double* %arrayidx1, align 1
   ret double %0
@@ -73,7 +73,7 @@ entry:
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
 ; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
   %0 = load double* @gd, align 8
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   store double %0, double* %arrayidx1, align 1
@@ -83,7 +83,7 @@ entry:
 define float @foo8() nounwind readonly {
 entry:
 ; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret float %0
 }
@@ -91,7 +91,7 @@ entry:
 define void @foo9(float %f) nounwind {
 entry:
 ; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/fp-spill-reload.ll b/test/CodeGen/Mips/fp-spill-reload.ll
new file mode 100644
index 0000000..f9887a5
--- /dev/null
+++ b/test/CodeGen/Mips/fp-spill-reload.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; check that $fp is not reserved. 
+
+define void @foo0(i32* nocapture %b) nounwind {
+entry:
+; CHECK: sw  $fp
+; CHECK: lw  $fp
+  %0 = load i32* %b, align 4
+  %arrayidx.1 = getelementptr inbounds i32* %b, i32 1
+  %1 = load i32* %arrayidx.1, align 4
+  %add.1 = add nsw i32 %1, 1
+  %arrayidx.2 = getelementptr inbounds i32* %b, i32 2
+  %2 = load i32* %arrayidx.2, align 4
+  %add.2 = add nsw i32 %2, 2
+  %arrayidx.3 = getelementptr inbounds i32* %b, i32 3
+  %3 = load i32* %arrayidx.3, align 4
+  %add.3 = add nsw i32 %3, 3
+  %arrayidx.4 = getelementptr inbounds i32* %b, i32 4
+  %4 = load i32* %arrayidx.4, align 4
+  %add.4 = add nsw i32 %4, 4
+  %arrayidx.5 = getelementptr inbounds i32* %b, i32 5
+  %5 = load i32* %arrayidx.5, align 4
+  %add.5 = add nsw i32 %5, 5
+  %arrayidx.6 = getelementptr inbounds i32* %b, i32 6
+  %6 = load i32* %arrayidx.6, align 4
+  %add.6 = add nsw i32 %6, 6
+  %arrayidx.7 = getelementptr inbounds i32* %b, i32 7
+  %7 = load i32* %arrayidx.7, align 4
+  %add.7 = add nsw i32 %7, 7
+  call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
+  call void bitcast (void (...)* @foo1 to void ()*)() nounwind
+  call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
+  ret void
+}
+
+declare void @foo2(i32, i32, i32, i32, i32, i32, i32, i32)
+
+declare void @foo1(...)
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
index 174d1f9..1c0eb01 100644
--- a/test/CodeGen/Mips/global-pointer-reg.ll
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+; DISABLED: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+; RUN: false
+; XFAIL: *
 
 @g0 = external global i32
 @g1 = external global i32
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
index ee7e131..cbcf0c9 100644
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; DISABLE: llc -march=mips < %s | FileCheck %s
+; RUN: false
+; XFAIL: *
 
 @p = external global i32
 @q = external global i32
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
new file mode 100644
index 0000000..bee93ac
--- /dev/null
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -0,0 +1,34 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+;
+; re-enable this when mips16's jalr is fixed.
+; DISABLED: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+
+
+@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  ret i32 0
+
+; SR: 	.set	mips16                  # @main
+
+; SR:	save 	$ra, [[FS:[0-9]+]]
+; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
+; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
+; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
+; C1:	lw	${{[0-9]+}}, %got($.str)(${{[0-9]+}})
+; C2:	lw	${{[0-9]+}}, %call16(printf)(${{[0-9]+}})
+; C1:	addiu	${{[0-9]+}}, %lo($.str)
+; C2:	move	$25, ${{[0-9]+}}
+; C1:	move 	$gp, ${{[0-9]+}}
+; C1:	jalr 	${{[0-9]+}}
+; SR:	restore 	$ra, [[FS]]
+; PE:	li	$2, 0
+; PE:	jr 	$ra
+
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
new file mode 100644
index 0000000..f9e53cb
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
@@ -0,0 +1,15 @@
+;
+;This is a negative test. The constant value given for the constraint
+;is greater than 16 bits.
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'I'
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
new file mode 100644
index 0000000..1fdf672
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (J)
+;is non-zero (3).
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'J'
+
+  tail call i32 asm "addi $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll
new file mode 100644
index 0000000..3baf437
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (K)
+;is greater than 16 bits (0x00100000).
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'K'
+
+  tail call i32 asm "addu $0,$1,$2", "=r,r,K"(i32 1024, i32 1048576) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
new file mode 100644
index 0000000..49dcc87
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (L)
+;is non-zero in the lower 16 bits (0x00100003).
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'L'
+
+  tail call i32 asm "addi $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
new file mode 100644
index 0000000..770669d
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
@@ -0,0 +1,17 @@
+ 
+;This is a negative test. The constant value given for the constraint (N).
+;immediate in the range of -65535 to -1 (inclusive).
+;Our example uses the positive value 3.
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'N'
+
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
new file mode 100644
index 0000000..cd4431a
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (O).
+;signed 15 bit immediate (+- 16383).
+;Our example uses the positive value 16384.
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'O'
+
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
new file mode 100644
index 0000000..0a4739e
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
@@ -0,0 +1,16 @@
+;
+; This is a negative test. The constant value given for the constraint (P).
+; A constant in the range of 1 to 655535 inclusive.
+; Our example uses the positive value 655536.
+;
+; RUN: not llc -march=mipsel < %s  2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'P'
+
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
new file mode 100644
index 0000000..94ded30
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -0,0 +1,44 @@
+; Positive test for inline register constraints
+;
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+
+; r with char
+;CHECK:	#APP
+;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},23
+;CHECK:	#NO_APP
+  tail call i8 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+
+; r with short
+;CHECK:	#APP
+;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},13
+;CHECK:	#NO_APP
+  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+
+; r with int
+;CHECK:	#APP
+;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	#NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+
+; Now c with 1024: make sure register $25 is picked
+; CHECK: #APP
+; CHECK: addi $25,${{[0-9]+}},1024
+; CHECK: #NO_APP	
+   tail call i32 asm sideeffect "addi $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+
+; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
+; after the inline expression for a mflo to pull the value out of lo.
+; CHECK: #APP
+; CHECK-NEXT:  mtlo ${{[0-9]+}} 
+; CHECK-NEXT:  madd ${{[0-9]+}},${{[0-9]+}}
+; CHECK-NEXT: #NO_APP	
+; CHECK-NEXT:  mflo	${{[0-9]+}}
+  %bosco = alloca i32, align 4
+  call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1,$2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
+  store volatile i32 %4, i32* %bosco, align 4
+ 
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
new file mode 100644
index 0000000..7870666
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -0,0 +1,20 @@
+;
+; Register constraint "r" shouldn't take long long unless
+; The target is 64 bit.
+;
+;
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+
+define i32 @main() nounwind {
+entry:
+
+
+; r with long long
+;CHECK:	#APP
+;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	#NO_APP
+  tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
new file mode 100644
index 0000000..0197899
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -0,0 +1,153 @@
+; Positive test for inline register constraints
+;
+; RUN: llc -march=mipsel < %s  | FileCheck -check-prefix=CHECK_LITTLE_32 %s
+; RUN: llc -march=mips < %s  | FileCheck -check-prefix=CHECK_BIG_32 %s
+
+%union.u_tag = type { i64 }
+%struct.anon = type { i32, i32 }
+@uval = common global %union.u_tag zeroinitializer, align 8
+
+; X with -3
+define i32 @constraint_X() nounwind {
+entry:
+;CHECK_LITTLE_32:   constraint_X:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
+;CHECK_LITTLE_32: #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+  ret i32 0
+}
+
+; x with -3
+define i32 @constraint_x() nounwind {
+entry:
+;CHECK_LITTLE_32:   constraint_x:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd
+;CHECK_LITTLE_32: #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+  ret i32 0
+}
+
+; d with -3
+define i32 @constraint_d() nounwind {
+entry:
+;CHECK_LITTLE_32:   constraint_d:
+;CHECK_LITTLE_32:   #APP
+;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:   #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+  ret i32 0
+}
+
+; m with -3
+define i32 @constraint_m() nounwind {
+entry:
+;CHECK_LITTLE_32:   constraint_m:
+;CHECK_LITTLE_32:   #APP
+;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-4
+;CHECK_LITTLE_32:   #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+  ret i32 0
+}
+
+; z with -3
+define i32 @constraint_z() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_z:
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:    #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+
+; z with 0
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},$0
+;CHECK_LITTLE_32:    #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+  ret i32 0
+}
+
+; a long long in 32 bit mode (use to assert)
+define i32 @constraint_longlong() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_longlong:
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK_LITTLE_32:    #NO_APP
+  tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+  ret i32 0
+}
+
+; D, in little endian the source reg will be 4 bytes into the long long
+define i32 @constraint_D() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_D:
+;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+
+; D, in big endian the source reg will also be 4 bytes into the long long
+;CHECK_BIG_32:    constraint_D:
+;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32:       #APP
+;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_BIG_32:       #NO_APP
+  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %trunc1 = trunc i64 %bosco to i32
+  tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  ret i32 0
+}
+
+; L, in little endian the source reg will be 0 bytes into the long long
+define i32 @constraint_L() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_L:
+;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+; L, in big endian the source reg will be 4 bytes into the long long
+;CHECK_BIG_32: constraint_L:
+;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32:       #APP
+;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_BIG_32:       #NO_APP
+  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %trunc1 = trunc i64 %bosco to i32
+  tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  ret i32 0
+}
+
+; M, in little endian the source reg will be 4 bytes into the long long
+define i32 @constraint_M() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_M:
+;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+; M, in big endian the source reg will be 0 bytes into the long long
+;CHECK_BIG_32:    constraint_M:
+;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32:       #APP
+;CHECK_BIG_32:       or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
+;CHECK_BIG_32:       #NO_APP
+  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %trunc1 = trunc i64 %bosco to i32
+  tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
new file mode 100644
index 0000000..5adec3b
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+
+; First I with short
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},4096
+; CHECK: #NO_APP
+  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+
+; Then I with int
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP
+   tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
+
+; Now J with 0
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},0
+; CHECK: #NO_APP
+  tail call i32 asm sideeffect "addi $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+
+; Now K with 64
+; CHECK: #APP
+; CHECK: addu ${{[0-9]+}},${{[0-9]+}},64
+; CHECK: #NO_APP	
+  tail call i16 asm sideeffect "addu $0,$1,$2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
+
+; Now L with 0x00100000
+; CHECK: #APP
+; CHECK: add ${{[0-9]+}},${{[0-9]+}},${{[0-9]+}}
+; CHECK: #NO_APP	
+  tail call i32 asm sideeffect "add $0,$1,$3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
+
+; Now N with -3
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP	
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
+
+; Now O with -3
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP	
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
+
+; Now P with 65535
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},65535
+; CHECK: #NO_APP	
+  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
+
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 4b31a88..1c7c443 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -11,7 +11,7 @@ entry:
 ; CHECK: #APP
 ; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
 ; CHECK: #NO_APP
-; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)
 ; CHECK: sw  $[[T3]], 0($[[T1]])
 
   %l1 = alloca i32, align 4
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index 434b386..863375a 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -6,7 +6,7 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
-; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(f2)
 ; CHECK: addiu $25, $[[R0]], %lo(f2)
   tail call fastcc void @f2()
   ret i32 0
@@ -14,7 +14,7 @@ entry:
 
 define void @caller(i32 %a0, i32 %a1) nounwind {
 entry:
-; CHECK: lw  $[[R1:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: lw  $[[R1:[0-9]+]], %got(caller.sf1)
 ; CHECK: lw  $25, %lo(caller.sf1)($[[R1]])
   %tobool = icmp eq i32 %a1, 0
   br i1 %tobool, label %if.end, label %if.then
@@ -25,9 +25,9 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
-; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)($gp)
+; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)
 ; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
-; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)
 ; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
   %tobool3 = icmp ne i32 %a0, 0
   %tmp4 = load void (...)** @gf1, align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index b7c9a9c..2e54879 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -6,10 +6,9 @@
 
 define void @f() nounwind {
 entry:
-; CHECK:  lui $at, 65534
-; CHECK:  addiu $at, $at, -24
+; CHECK:  lui $at, 65535
+; CHECK:  addiu $at, $at, -16
 ; CHECK:  addu  $sp, $sp, $at
-; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
   %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
new file mode 100644
index 0000000..aac2767
--- /dev/null
+++ b/test/CodeGen/Mips/lb1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 -1, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i8* @c, align 1
+; 16:	lb	${{[0-9]+}}, 0(${{[0-9]+}})
+  %conv = sext i8 %0 to i32
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
new file mode 100644
index 0000000..63e0cca
--- /dev/null
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 97, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i8* @c, align 1
+  %conv = zext i8 %0 to i32
+; 16:	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i8* @c, align 1
+  %conv1 = zext i8 %1 to i32
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
new file mode 100644
index 0000000..1f95b09
--- /dev/null
+++ b/test/CodeGen/Mips/lh1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@s = global i16 -1, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i16* @s, align 2
+  %conv = sext i16 %0 to i32
+; 16:	lh	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
new file mode 100644
index 0000000..0cfcede
--- /dev/null
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+
+@s = global i16 255, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i16* @s, align 2
+  %conv = zext i16 %0 to i32
+; 16:	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
new file mode 100644
index 0000000..d0928ee
--- /dev/null
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel < %s | FileCheck  -check-prefix=EL %s
+; RUN: llc -march=mips < %s | FileCheck  -check-prefix=EB %s
+
+%struct.SI = type { i32 }
+
+@si = common global %struct.SI zeroinitializer, align 1
+
+define i32 @foo_load_i() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+  ret i32 %0
+}
+
+define void @foo_store_i(i32 %a) nounwind {
+entry:
+; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: swr $[[R0]], 0($[[R1]])
+; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: swr $[[R0]], 3($[[R1]])
+
+  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
new file mode 100644
index 0000000..0227b88
--- /dev/null
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -force-mips-long-branch < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch < %s | FileCheck %s -check-prefix=N64
+
+@g0 = external global i32
+
+define void @foo1(i32 %s) nounwind {
+entry:
+; O32: bal
+; N64: bal
+; N64: highest
+; N64: higher
+
+  %tobool = icmp eq i32 %s, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* @g0, align 4
+  %add = add nsw i32 %0, 12
+  store i32 %add, i32* @g0, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/machineverifier.ll b/test/CodeGen/Mips/machineverifier.ll
new file mode 100644
index 0000000..c673fe5
--- /dev/null
+++ b/test/CodeGen/Mips/machineverifier.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=mipsel -verify-machineinstrs
+; Make sure machine verifier understands the last instruction of a basic block
+; is not the terminator instruction after delay slot filler pass is run.
+
+@g = external global i32
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @g, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %0, 10
+  store i32 %add, i32* @g, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/memcpy.ll b/test/CodeGen/Mips/memcpy.ll
new file mode 100644
index 0000000..39764a9
--- /dev/null
+++ b/test/CodeGen/Mips/memcpy.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s 
+
+%struct.S1 = type { i32, [41 x i8] }
+
+@.str = private unnamed_addr constant [31 x i8] c"abcdefghijklmnopqrstuvwxyzABCD\00", align 1
+
+define void @foo1(%struct.S1* %s1, i8 signext %n) nounwind {
+entry:
+; CHECK-NOT: call16(memcpy
+
+  %arraydecay = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 0
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 40
+  store i8 %n, i8* %arrayidx, align 1
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
index 09745fb..bbdc05c 100644
--- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -30,7 +30,7 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
   %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -60,7 +60,7 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   %0 = load float* @gf, align 4
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
@@ -72,7 +72,7 @@ entry:
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
 ; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
   %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -83,7 +83,7 @@ entry:
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
 ; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
   %0 = load double* @gd, align 8
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
@@ -95,7 +95,7 @@ entry:
 define float @foo8() nounwind readonly {
 entry:
 ; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
   ret float %0
 }
@@ -103,7 +103,7 @@ entry:
 define void @foo9(float %f) nounwind {
 entry:
 ; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/mips64load-store-left-right.ll b/test/CodeGen/Mips/mips64load-store-left-right.ll
new file mode 100644
index 0000000..4561429
--- /dev/null
+++ b/test/CodeGen/Mips/mips64load-store-left-right.ll
@@ -0,0 +1,73 @@
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EL %s
+; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EB %s
+
+%struct.SLL = type { i64 }
+%struct.SI = type { i32 }
+%struct.SUI = type { i32 }
+
+@sll = common global %struct.SLL zeroinitializer, align 1
+@si = common global %struct.SI zeroinitializer, align 1
+@sui = common global %struct.SUI zeroinitializer, align 1
+
+define i64 @foo_load_ll() nounwind readonly {
+entry:
+; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; EL: ldr $[[R0]], 0($[[R1]])
+; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: ldr $[[R0]], 7($[[R1]])
+
+  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret i64 %0
+}
+
+define i64 @foo_load_i() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @foo_load_ui() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EL: daddiu $[[R2:[0-9]+]], $zero, 1
+; EL: dsll   $[[R3:[0-9]+]], $[[R2]], 32
+; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1
+; EL: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+
+  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+define void @foo_store_ll(i64 %a) nounwind {
+entry:
+; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; EL: sdr $[[R0]], 0($[[R1]])
+; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: sdr $[[R0]], 7($[[R1]])
+
+  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret void
+}
+
+define void @foo_store_i(i32 %a) nounwind {
+entry:
+; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: swr $[[R0]], 0($[[R1]])
+; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: swr $[[R0]], 3($[[R1]])
+
+  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
new file mode 100644
index 0000000..281e626
--- /dev/null
+++ b/test/CodeGen/Mips/neg1.ll
@@ -0,0 +1,15 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %sub = sub nsw i32 0, %0
+; 16:	neg	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
new file mode 100644
index 0000000..2163b23
--- /dev/null
+++ b/test/CodeGen/Mips/not1.ll
@@ -0,0 +1,16 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %neg = xor i32 %0, -1
+; 16:	not	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %neg)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll
new file mode 100644
index 0000000..7beae99
--- /dev/null
+++ b/test/CodeGen/Mips/null.ll
@@ -0,0 +1,13 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 < %s | FileCheck %s -check-prefix=16
+
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+
+; 16: 	.set	mips16                  # @main
+
+
+; 16:	jr	$ra
+
+}
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index c5cbc7a..eac0d80 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -10,19 +10,19 @@
 
 define void @f1() nounwind {
 entry:
-; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
+; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
 ; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
 ; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
 ; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R2]], 16($sp)
 ; CHECK: lw  $7, 4($[[R0]])
 ; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
@@ -43,16 +43,16 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
 
 define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: lw  $4, 88($sp)
-; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
-; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
-; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: lw  $4, 80($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 64($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 68($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 60($sp)
+; CHECK: lh  $[[R1:[0-9]+]], 58($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 56($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
 ; CHECK: sw  $[[R1]], 28($sp)
 ; CHECK: sw  $[[R2]], 24($sp)
@@ -80,13 +80,13 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
 
 define void @f3(%struct.S2* nocapture byval %s2) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $4, 56($sp)
-; CHECK: lw  $4, 56($sp)
-; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: sw  $5, 52($sp)
+; CHECK: sw  $4, 48($sp)
+; CHECK: lw  $4, 48($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 60($sp)
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -99,13 +99,13 @@ entry:
 
 define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: lw  $4, 68($sp)
-; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
-; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: sw  $5, 52($sp)
+; CHECK: lw  $4, 60($sp)
+; CHECK: lw  $[[R1:[0-9]+]], 80($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 52($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
 ; CHECK: sw  $[[R1]], 24($sp)
 
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 4a3d9ab..35332b6 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s
 
-
 ; All test functions do the same thing - they return the first variable
 ; argument.
 
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
new file mode 100644
index 0000000..b1c3696
--- /dev/null
+++ b/test/CodeGen/Mips/or1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %or = or i32 %0, %1
+; 16:	or	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %or)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/ra-allocatable.ll b/test/CodeGen/Mips/ra-allocatable.ll
new file mode 100644
index 0000000..7621788
--- /dev/null
+++ b/test/CodeGen/Mips/ra-allocatable.ll
@@ -0,0 +1,288 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+
+@a0 = external global i32
+@b0 = external global i32*
+@a1 = external global i32
+@b1 = external global i32*
+@a2 = external global i32
+@b2 = external global i32*
+@a3 = external global i32
+@b3 = external global i32*
+@a4 = external global i32
+@b4 = external global i32*
+@a5 = external global i32
+@b5 = external global i32*
+@a6 = external global i32
+@b6 = external global i32*
+@a7 = external global i32
+@b7 = external global i32*
+@a8 = external global i32
+@b8 = external global i32*
+@a9 = external global i32
+@b9 = external global i32*
+@a10 = external global i32
+@b10 = external global i32*
+@a11 = external global i32
+@b11 = external global i32*
+@a12 = external global i32
+@b12 = external global i32*
+@a13 = external global i32
+@b13 = external global i32*
+@a14 = external global i32
+@b14 = external global i32*
+@a15 = external global i32
+@b15 = external global i32*
+@a16 = external global i32
+@b16 = external global i32*
+@a17 = external global i32
+@b17 = external global i32*
+@a18 = external global i32
+@b18 = external global i32*
+@a19 = external global i32
+@b19 = external global i32*
+@a20 = external global i32
+@b20 = external global i32*
+@a21 = external global i32
+@b21 = external global i32*
+@a22 = external global i32
+@b22 = external global i32*
+@a23 = external global i32
+@b23 = external global i32*
+@a24 = external global i32
+@b24 = external global i32*
+@a25 = external global i32
+@b25 = external global i32*
+@a26 = external global i32
+@b26 = external global i32*
+@a27 = external global i32
+@b27 = external global i32*
+@a28 = external global i32
+@b28 = external global i32*
+@a29 = external global i32
+@b29 = external global i32*
+@c0 = external global i32*
+@c1 = external global i32*
+@c2 = external global i32*
+@c3 = external global i32*
+@c4 = external global i32*
+@c5 = external global i32*
+@c6 = external global i32*
+@c7 = external global i32*
+@c8 = external global i32*
+@c9 = external global i32*
+@c10 = external global i32*
+@c11 = external global i32*
+@c12 = external global i32*
+@c13 = external global i32*
+@c14 = external global i32*
+@c15 = external global i32*
+@c16 = external global i32*
+@c17 = external global i32*
+@c18 = external global i32*
+@c19 = external global i32*
+@c20 = external global i32*
+@c21 = external global i32*
+@c22 = external global i32*
+@c23 = external global i32*
+@c24 = external global i32*
+@c25 = external global i32*
+@c26 = external global i32*
+@c27 = external global i32*
+@c28 = external global i32*
+@c29 = external global i32*
+
+define i32 @f1() nounwind {
+entry:
+; CHECK: sw  $ra, {{[0-9]+}}($sp)            # 4-byte Folded Spill
+; CHECK: $ra
+; CHECK: lw  $ra, {{[0-9]+}}($sp)            # 4-byte Folded Reload
+; CHECK: jr  $ra
+
+  %0 = load i32* @a0, align 4, !tbaa !0
+  %1 = load i32** @b0, align 4, !tbaa !3
+  store i32 %0, i32* %1, align 4, !tbaa !0
+  %2 = load i32* @a1, align 4, !tbaa !0
+  %3 = load i32** @b1, align 4, !tbaa !3
+  store i32 %2, i32* %3, align 4, !tbaa !0
+  %4 = load i32* @a2, align 4, !tbaa !0
+  %5 = load i32** @b2, align 4, !tbaa !3
+  store i32 %4, i32* %5, align 4, !tbaa !0
+  %6 = load i32* @a3, align 4, !tbaa !0
+  %7 = load i32** @b3, align 4, !tbaa !3
+  store i32 %6, i32* %7, align 4, !tbaa !0
+  %8 = load i32* @a4, align 4, !tbaa !0
+  %9 = load i32** @b4, align 4, !tbaa !3
+  store i32 %8, i32* %9, align 4, !tbaa !0
+  %10 = load i32* @a5, align 4, !tbaa !0
+  %11 = load i32** @b5, align 4, !tbaa !3
+  store i32 %10, i32* %11, align 4, !tbaa !0
+  %12 = load i32* @a6, align 4, !tbaa !0
+  %13 = load i32** @b6, align 4, !tbaa !3
+  store i32 %12, i32* %13, align 4, !tbaa !0
+  %14 = load i32* @a7, align 4, !tbaa !0
+  %15 = load i32** @b7, align 4, !tbaa !3
+  store i32 %14, i32* %15, align 4, !tbaa !0
+  %16 = load i32* @a8, align 4, !tbaa !0
+  %17 = load i32** @b8, align 4, !tbaa !3
+  store i32 %16, i32* %17, align 4, !tbaa !0
+  %18 = load i32* @a9, align 4, !tbaa !0
+  %19 = load i32** @b9, align 4, !tbaa !3
+  store i32 %18, i32* %19, align 4, !tbaa !0
+  %20 = load i32* @a10, align 4, !tbaa !0
+  %21 = load i32** @b10, align 4, !tbaa !3
+  store i32 %20, i32* %21, align 4, !tbaa !0
+  %22 = load i32* @a11, align 4, !tbaa !0
+  %23 = load i32** @b11, align 4, !tbaa !3
+  store i32 %22, i32* %23, align 4, !tbaa !0
+  %24 = load i32* @a12, align 4, !tbaa !0
+  %25 = load i32** @b12, align 4, !tbaa !3
+  store i32 %24, i32* %25, align 4, !tbaa !0
+  %26 = load i32* @a13, align 4, !tbaa !0
+  %27 = load i32** @b13, align 4, !tbaa !3
+  store i32 %26, i32* %27, align 4, !tbaa !0
+  %28 = load i32* @a14, align 4, !tbaa !0
+  %29 = load i32** @b14, align 4, !tbaa !3
+  store i32 %28, i32* %29, align 4, !tbaa !0
+  %30 = load i32* @a15, align 4, !tbaa !0
+  %31 = load i32** @b15, align 4, !tbaa !3
+  store i32 %30, i32* %31, align 4, !tbaa !0
+  %32 = load i32* @a16, align 4, !tbaa !0
+  %33 = load i32** @b16, align 4, !tbaa !3
+  store i32 %32, i32* %33, align 4, !tbaa !0
+  %34 = load i32* @a17, align 4, !tbaa !0
+  %35 = load i32** @b17, align 4, !tbaa !3
+  store i32 %34, i32* %35, align 4, !tbaa !0
+  %36 = load i32* @a18, align 4, !tbaa !0
+  %37 = load i32** @b18, align 4, !tbaa !3
+  store i32 %36, i32* %37, align 4, !tbaa !0
+  %38 = load i32* @a19, align 4, !tbaa !0
+  %39 = load i32** @b19, align 4, !tbaa !3
+  store i32 %38, i32* %39, align 4, !tbaa !0
+  %40 = load i32* @a20, align 4, !tbaa !0
+  %41 = load i32** @b20, align 4, !tbaa !3
+  store i32 %40, i32* %41, align 4, !tbaa !0
+  %42 = load i32* @a21, align 4, !tbaa !0
+  %43 = load i32** @b21, align 4, !tbaa !3
+  store i32 %42, i32* %43, align 4, !tbaa !0
+  %44 = load i32* @a22, align 4, !tbaa !0
+  %45 = load i32** @b22, align 4, !tbaa !3
+  store i32 %44, i32* %45, align 4, !tbaa !0
+  %46 = load i32* @a23, align 4, !tbaa !0
+  %47 = load i32** @b23, align 4, !tbaa !3
+  store i32 %46, i32* %47, align 4, !tbaa !0
+  %48 = load i32* @a24, align 4, !tbaa !0
+  %49 = load i32** @b24, align 4, !tbaa !3
+  store i32 %48, i32* %49, align 4, !tbaa !0
+  %50 = load i32* @a25, align 4, !tbaa !0
+  %51 = load i32** @b25, align 4, !tbaa !3
+  store i32 %50, i32* %51, align 4, !tbaa !0
+  %52 = load i32* @a26, align 4, !tbaa !0
+  %53 = load i32** @b26, align 4, !tbaa !3
+  store i32 %52, i32* %53, align 4, !tbaa !0
+  %54 = load i32* @a27, align 4, !tbaa !0
+  %55 = load i32** @b27, align 4, !tbaa !3
+  store i32 %54, i32* %55, align 4, !tbaa !0
+  %56 = load i32* @a28, align 4, !tbaa !0
+  %57 = load i32** @b28, align 4, !tbaa !3
+  store i32 %56, i32* %57, align 4, !tbaa !0
+  %58 = load i32* @a29, align 4, !tbaa !0
+  %59 = load i32** @b29, align 4, !tbaa !3
+  store i32 %58, i32* %59, align 4, !tbaa !0
+  %60 = load i32* @a0, align 4, !tbaa !0
+  %61 = load i32** @c0, align 4, !tbaa !3
+  store i32 %60, i32* %61, align 4, !tbaa !0
+  %62 = load i32* @a1, align 4, !tbaa !0
+  %63 = load i32** @c1, align 4, !tbaa !3
+  store i32 %62, i32* %63, align 4, !tbaa !0
+  %64 = load i32* @a2, align 4, !tbaa !0
+  %65 = load i32** @c2, align 4, !tbaa !3
+  store i32 %64, i32* %65, align 4, !tbaa !0
+  %66 = load i32* @a3, align 4, !tbaa !0
+  %67 = load i32** @c3, align 4, !tbaa !3
+  store i32 %66, i32* %67, align 4, !tbaa !0
+  %68 = load i32* @a4, align 4, !tbaa !0
+  %69 = load i32** @c4, align 4, !tbaa !3
+  store i32 %68, i32* %69, align 4, !tbaa !0
+  %70 = load i32* @a5, align 4, !tbaa !0
+  %71 = load i32** @c5, align 4, !tbaa !3
+  store i32 %70, i32* %71, align 4, !tbaa !0
+  %72 = load i32* @a6, align 4, !tbaa !0
+  %73 = load i32** @c6, align 4, !tbaa !3
+  store i32 %72, i32* %73, align 4, !tbaa !0
+  %74 = load i32* @a7, align 4, !tbaa !0
+  %75 = load i32** @c7, align 4, !tbaa !3
+  store i32 %74, i32* %75, align 4, !tbaa !0
+  %76 = load i32* @a8, align 4, !tbaa !0
+  %77 = load i32** @c8, align 4, !tbaa !3
+  store i32 %76, i32* %77, align 4, !tbaa !0
+  %78 = load i32* @a9, align 4, !tbaa !0
+  %79 = load i32** @c9, align 4, !tbaa !3
+  store i32 %78, i32* %79, align 4, !tbaa !0
+  %80 = load i32* @a10, align 4, !tbaa !0
+  %81 = load i32** @c10, align 4, !tbaa !3
+  store i32 %80, i32* %81, align 4, !tbaa !0
+  %82 = load i32* @a11, align 4, !tbaa !0
+  %83 = load i32** @c11, align 4, !tbaa !3
+  store i32 %82, i32* %83, align 4, !tbaa !0
+  %84 = load i32* @a12, align 4, !tbaa !0
+  %85 = load i32** @c12, align 4, !tbaa !3
+  store i32 %84, i32* %85, align 4, !tbaa !0
+  %86 = load i32* @a13, align 4, !tbaa !0
+  %87 = load i32** @c13, align 4, !tbaa !3
+  store i32 %86, i32* %87, align 4, !tbaa !0
+  %88 = load i32* @a14, align 4, !tbaa !0
+  %89 = load i32** @c14, align 4, !tbaa !3
+  store i32 %88, i32* %89, align 4, !tbaa !0
+  %90 = load i32* @a15, align 4, !tbaa !0
+  %91 = load i32** @c15, align 4, !tbaa !3
+  store i32 %90, i32* %91, align 4, !tbaa !0
+  %92 = load i32* @a16, align 4, !tbaa !0
+  %93 = load i32** @c16, align 4, !tbaa !3
+  store i32 %92, i32* %93, align 4, !tbaa !0
+  %94 = load i32* @a17, align 4, !tbaa !0
+  %95 = load i32** @c17, align 4, !tbaa !3
+  store i32 %94, i32* %95, align 4, !tbaa !0
+  %96 = load i32* @a18, align 4, !tbaa !0
+  %97 = load i32** @c18, align 4, !tbaa !3
+  store i32 %96, i32* %97, align 4, !tbaa !0
+  %98 = load i32* @a19, align 4, !tbaa !0
+  %99 = load i32** @c19, align 4, !tbaa !3
+  store i32 %98, i32* %99, align 4, !tbaa !0
+  %100 = load i32* @a20, align 4, !tbaa !0
+  %101 = load i32** @c20, align 4, !tbaa !3
+  store i32 %100, i32* %101, align 4, !tbaa !0
+  %102 = load i32* @a21, align 4, !tbaa !0
+  %103 = load i32** @c21, align 4, !tbaa !3
+  store i32 %102, i32* %103, align 4, !tbaa !0
+  %104 = load i32* @a22, align 4, !tbaa !0
+  %105 = load i32** @c22, align 4, !tbaa !3
+  store i32 %104, i32* %105, align 4, !tbaa !0
+  %106 = load i32* @a23, align 4, !tbaa !0
+  %107 = load i32** @c23, align 4, !tbaa !3
+  store i32 %106, i32* %107, align 4, !tbaa !0
+  %108 = load i32* @a24, align 4, !tbaa !0
+  %109 = load i32** @c24, align 4, !tbaa !3
+  store i32 %108, i32* %109, align 4, !tbaa !0
+  %110 = load i32* @a25, align 4, !tbaa !0
+  %111 = load i32** @c25, align 4, !tbaa !3
+  store i32 %110, i32* %111, align 4, !tbaa !0
+  %112 = load i32* @a26, align 4, !tbaa !0
+  %113 = load i32** @c26, align 4, !tbaa !3
+  store i32 %112, i32* %113, align 4, !tbaa !0
+  %114 = load i32* @a27, align 4, !tbaa !0
+  %115 = load i32** @c27, align 4, !tbaa !3
+  store i32 %114, i32* %115, align 4, !tbaa !0
+  %116 = load i32* @a28, align 4, !tbaa !0
+  %117 = load i32** @c28, align 4, !tbaa !3
+  store i32 %116, i32* %117, align 4, !tbaa !0
+  %118 = load i32* @a29, align 4, !tbaa !0
+  %119 = load i32** @c29, align 4, !tbaa !3
+  store i32 %118, i32* %119, align 4, !tbaa !0
+  %120 = load i32* @a0, align 4, !tbaa !0
+  ret i32 %120
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/Mips/rdhwr-directives.ll b/test/CodeGen/Mips/rdhwr-directives.ll
new file mode 100644
index 0000000..27010d4
--- /dev/null
+++ b/test/CodeGen/Mips/rdhwr-directives.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static < %s | FileCheck %s
+
+@a = external thread_local global i32
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK: .set  push
+; CHECK: .set  mips32r2
+; CHECK: rdhwr 
+; CHECK: .set  pop
+
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/Mips/return_address.ll b/test/CodeGen/Mips/return_address.ll
new file mode 100644
index 0000000..e1c9241
--- /dev/null
+++ b/test/CodeGen/Mips/return_address.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i8* @f1() nounwind {
+entry:
+  %0 = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+
+; CHECK:    addu    $2, $zero, $ra
+}
+
+define i8* @f2() nounwind {
+entry:
+  call void @g()
+  %0 = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+
+; CHECK:    addu    $[[R0:[0-9]+]], $zero, $ra
+; CHECK:    jal
+; CHECK:    addu    $2,  $zero, $[[R0]]
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+declare void @g()
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
new file mode 100644
index 0000000..e1a28d4
--- /dev/null
+++ b/test/CodeGen/Mips/sb1.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [8 x i8] c"%i %c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %conv = trunc i32 %0 to i8
+  store i8 %conv, i8* @c, align 1
+  %1 = load i32* @i, align 4
+  %2 = load i8* @c, align 1
+  %conv1 = sext i8 %2 to i32
+; 16:	sb	${{[0-9]+}}, 0(${{[0-9]+}})
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/selectcc.ll b/test/CodeGen/Mips/selectcc.ll
new file mode 100644
index 0000000..a17517e
--- /dev/null
+++ b/test/CodeGen/Mips/selectcc.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel < %s
+
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define float @select_cc_f32(float %a, float %b) nounwind {
+entry:
+  store float 0.000000e+00, float* @gf0, align 4
+  store float 1.000000e+00, float* @gf1, align 4
+  %cmp = fcmp olt float %a, %b
+  %conv = zext i1 %cmp to i32
+  %conv1 = sitofp i32 %conv to float
+  ret float %conv1
+}
+
+define double @select_cc_f64(double %a, double %b) nounwind {
+entry:
+  store double 0.000000e+00, double* @gd0, align 8
+  store double 1.000000e+00, double* @gd1, align 8
+  %cmp = fcmp olt double %a, %b
+  %conv = zext i1 %cmp to i32
+  %conv1 = sitofp i32 %conv to double
+  ret double %conv1
+}
+
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
new file mode 100644
index 0000000..1746ae2
--- /dev/null
+++ b/test/CodeGen/Mips/sh1.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@s = common global i16 0, align 2
+@.str = private unnamed_addr constant [9 x i8] c"%i %hi \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %conv = trunc i32 %0 to i16
+  store i16 %conv, i16* @s, align 2
+  %1 = load i32* @i, align 4
+  %2 = load i16* @s, align 2
+  %conv1 = sext i16 %2 to i32
+; 16:	sh	${{[0-9]+}}, 0(${{[0-9]+}})
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/shift-parts.ll b/test/CodeGen/Mips/shift-parts.ll
new file mode 100644
index 0000000..38cbf28
--- /dev/null
+++ b/test/CodeGen/Mips/shift-parts.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i64 @shl0(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: shl0
+; CHECK-NOT: lw $25, %call16(__
+  %sh_prom = zext i32 %b to i64
+  %shl = shl i64 %a, %sh_prom
+  ret i64 %shl
+}
+
+define i64 @shr1(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: shr1
+; CHECK-NOT: lw $25, %call16(__
+  %sh_prom = zext i32 %b to i64
+  %shr = lshr i64 %a, %sh_prom
+  ret i64 %shr
+}
+
+define i64 @sra2(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: sra2
+; CHECK-NOT: lw $25, %call16(__
+  %sh_prom = zext i32 %b to i64
+  %shr = ashr i64 %a, %sh_prom
+  ret i64 %shr
+}
+
diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
new file mode 100644
index 0000000..576cbd8
--- /dev/null
+++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@foo12.d4 = internal unnamed_addr global double 0.000000e+00, align 8
+
+define double @foo12(i32 %a, i32, i64 %b) nounwind {
+entry:
+; check that this transformation doesn't happen:
+; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+;
+; CHECK-NOT:   # double -1.000000e+00
+
+  %tobool1 = icmp ne i32 %a, 0
+  %not.tobool = icmp ne i64 %b, 0
+  %tobool1. = or i1 %tobool1, %not.tobool
+  %lor.ext = zext i1 %tobool1. to i32
+  %conv = sitofp i32 %lor.ext to double
+  %1 = load double* @foo12.d4, align 8
+  %add = fadd double %conv, %1
+  store double %add, double* @foo12.d4, align 8
+  ret double %add
+}
+
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
new file mode 100644
index 0000000..fdcd38c
--- /dev/null
+++ b/test/CodeGen/Mips/sll1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  %0 = load i32* @i, align 4
+  %shl = shl i32 %0, 4
+; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  store i32 %shl, i32* @j, align 4
+  %1 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
new file mode 100644
index 0000000..c2af454
--- /dev/null
+++ b/test/CodeGen/Mips/sll2.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %shl = shl i32 %0, %1
+; 16:	sllv	${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %shl, i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
new file mode 100644
index 0000000..15bf8d6
--- /dev/null
+++ b/test/CodeGen/Mips/sra1.ll
@@ -0,0 +1,15 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %shr = ashr i32 %0, 3
+; 16:	sra	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
new file mode 100644
index 0000000..26bf19d
--- /dev/null
+++ b/test/CodeGen/Mips/sra2.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@j = global i32 3, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %shr = ashr i32 %0, %1
+; 16:	srav	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
new file mode 100644
index 0000000..3474283
--- /dev/null
+++ b/test/CodeGen/Mips/srl1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %shr = lshr i32 %0, 4
+; 16:	srl	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  store i32 %shr, i32* @j, align 4
+  %1 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
new file mode 100644
index 0000000..26ec092
--- /dev/null
+++ b/test/CodeGen/Mips/srl2.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@k = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @k, align 4
+  %shr = lshr i32 %0, %1
+; 16:	srlv	${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %shr, i32* @j, align 4
+  %2 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/stacksize.ll b/test/CodeGen/Mips/stacksize.ll
new file mode 100644
index 0000000..42021b2
--- /dev/null
+++ b/test/CodeGen/Mips/stacksize.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s 
+
+define i32 @foo(i32 %a) nounwind readnone {
+entry:
+; check that stack size is zero.
+; CHECK-NOT: addiu $sp, $sp
+  %add = add nsw i32 %a, 1
+  ret i32 %add
+}
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
new file mode 100644
index 0000000..195750b
--- /dev/null
+++ b/test/CodeGen/Mips/sub1.ll
@@ -0,0 +1,15 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %sub = sub nsw i32 %0, 5
+; 16:	addiu	${{[0-9]+}}, -{{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
new file mode 100644
index 0000000..4f6bfcc
--- /dev/null
+++ b/test/CodeGen/Mips/sub2.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 20, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @i, align 4
+  %sub = sub nsw i32 %0, %1
+; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
index da1e036..9f91a39 100644
--- a/test/CodeGen/Mips/swzero.ll
+++ b/test/CodeGen/Mips/swzero.ll
@@ -4,7 +4,8 @@
 
 define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
 entry:
-; CHECK: usw $zero
+; CHECK: swl $zero
+; CHECK: swr $zero
   %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
   store i32 0, i32* %x, align 1
   ret void
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
new file mode 100644
index 0000000..d681091
--- /dev/null
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s
+
+@foo = thread_local global i32 42
+@bar = hidden alias i32* @foo
+
+define i32* @zed() {
+; CHECK: __tls_get_addr
+; CHECK-NEXT: %tlsgd(bar)
+       ret i32* @bar
+}
diff --git a/test/CodeGen/Mips/tls-models.ll b/test/CodeGen/Mips/tls-models.ll
new file mode 100644
index 0000000..8f5789e
--- /dev/null
+++ b/test/CodeGen/Mips/tls-models.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck -check-prefix=CHECK-NONPIC %s
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+  ret i32* @external_gd
+
+  ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+  ; CHECK-NONPIC:   f1:
+  ; CHECK-NONPIC:   %gottprel
+  ; CHECK-PIC:      f1:
+  ; CHECK-PIC:      %tlsgd
+}
+
+define i32* @f2() {
+entry:
+  ret i32* @internal_gd
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic.
+  ; CHECK-NONPIC:   f2:
+  ; CHECK-NONPIC:   %tprel_hi
+  ; CHECK-PIC:      f2:
+  ; CHECK-PIC:      %tlsldm
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+  ret i32* @external_ld
+
+  ; Non-PIC code can use initial exec, PIC should use local dynamic.
+  ; CHECK-NONPIC:   f3:
+  ; CHECK-NONPIC:   %gottprel
+  ; CHECK-PIC:      f3:
+  ; CHECK-PIC:      %tlsldm
+}
+
+define i32* @f4() {
+entry:
+  ret i32* @internal_ld
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic.
+  ; CHECK-NONPIC:   f4:
+  ; CHECK-NONPIC:   %tprel_hi
+  ; CHECK-PIC:      f4:
+  ; CHECK-PIC:      %tlsldm
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+  ret i32* @external_ie
+
+  ; Non-PIC and PIC code will use initial exec as specified.
+  ; CHECK-NONPIC:   f5:
+  ; CHECK-NONPIC:   %gottprel
+  ; CHECK-PIC:      f5:
+  ; CHECK-PIC:      %gottprel
+}
+
+define i32* @f6() {
+entry:
+  ret i32* @internal_ie
+
+  ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+  ; CHECK-NONPIC:   f6:
+  ; CHECK-NONPIC:   %tprel_hi
+  ; CHECK-PIC:      f6:
+  ; CHECK-PIC:      %gottprel
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+  ret i32* @external_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; CHECK-NONPIC:   f7:
+  ; CHECK-NONPIC:   %tprel_hi
+  ; CHECK-PIC:      f7:
+  ; CHECK-PIC:      %tprel_hi
+}
+
+define i32* @f8() {
+entry:
+  ret i32* @internal_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; CHECK-NONPIC:   f8:
+  ; CHECK-NONPIC:   %tprel_hi
+  ; CHECK-PIC:      f8:
+  ; CHECK-PIC:      %tprel_hi
+}
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index a3c4768..a7ddb96 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -13,8 +13,9 @@ entry:
 
 ; CHECK: f1:
 
-; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
-; PIC:   addiu   $4, $gp, %tlsgd(t1)
+; PIC:   addu    $[[R0:[a-z0-9]+]], $2, $25
+; PIC:   lw      $25, %call16(__tls_get_addr)($[[R0]])
+; PIC:   addiu   $4, $[[R0]], %tlsgd(t1)
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
@@ -35,18 +36,19 @@ entry:
 
 ; CHECK: f2:
 
-; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
-; PIC:   addiu   $4, $gp, %tlsgd(t2)
+; PIC:   addu    $[[R0:[a-z0-9]+]], $2, $25
+; PIC:   lw      $25, %call16(__tls_get_addr)($[[R0]])
+; PIC:   addiu   $4, $[[R0]], %tlsgd(t2)
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
 ; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
 ; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
 ; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
-; STATIC:   lui     $gp, %hi(__gnu_local_gp)
-; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
+; STATIC:   lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATIC:   addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
-; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
+; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($[[GP]])
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
 ; STATIC:   lw      $2, 0($[[R1]])
 }
@@ -57,7 +59,7 @@ define i32 @f3() nounwind {
 entry:
 ; CHECK: f3:
 
-; PIC:   addiu   $4, $gp, %tlsldm(f3.i)
+; PIC:   addiu   $4, ${{[a-z0-9]+}}, %tlsldm(f3.i)
 ; PIC:   jalr    $25
 ; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
 ; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 6a087ba..7f880b6 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,27 +9,17 @@
 
 define void @foo1() nounwind {
 entry:
-; CHECK-EL: ulhu  $4, 2
-; CHECK-EL: lw  $25, %call16(foo2)
-; CHECK-EL: lw  $[[R0:[0-9]+]], %got(s4)
-; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
-; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
-; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
-; CHECK-EL: or  $5, $[[R2]], $[[R3]]
-; CHECK-EL: ulw $4, 0($[[R0]])
-; CHECK-EL: lw  $25, %call16(foo4)
+; CHECK-EL: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
+; CHECK-EL: lbu ${{[0-9]+}}, 3($[[R0]])
+; CHECK-EL: jalr
+; CHECK-EL: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
+; CHECK-EL: lwr $[[R1]], 0($[[R2]])
 
-; CHECK-EB: ulhu  $[[R0:[0-9]+]], 2
-; CHECK-EB: sll $4, $[[R0]], 16
-; CHECK-EB: lw  $25, %call16(foo2)
-; CHECK-EB: lw  $[[R1:[0-9]+]], %got(s4)
-; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
-; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
-; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
-; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
-; CHECK-EB: or  $5, $[[R4]], $[[R5]]
-; CHECK-EB: ulw $4, 0($[[R1]])
-; CHECK-EB: lw  $25, %call16(foo4)
+; CHECK-EB: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
+; CHECK-EB: lbu ${{[0-9]+}}, 2($[[R0]])
+; CHECK-EB: jalr
+; CHECK-EB: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
+; CHECK-BE: lwr $[[R1]], 3($[[R2]])
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
new file mode 100644
index 0000000..f2c1316
--- /dev/null
+++ b/test/CodeGen/Mips/xor1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %xor = xor i32 %0, %1
+; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %xor)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index b890e1d..79ed609 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -4,8 +4,7 @@
 
 define i32 @foo0(i32 %s) nounwind readonly {
 entry:
-; CHECK-NOT: addiu
-; CHECK:     movn
+; CHECK:     movn ${{[0-9]+}}, $zero
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4, !tbaa !0
   %cond = select i1 %tobool, i32 0, i32 %0
@@ -14,8 +13,7 @@ entry:
 
 define i32 @foo1(i32 %s) nounwind readonly {
 entry:
-; CHECK-NOT: addiu
-; CHECK:     movz
+; CHECK:     movz ${{[0-9]+}}, $zero
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4, !tbaa !0
   %cond = select i1 %tobool, i32 %0, i32 0
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
new file mode 100644
index 0000000..d93f688
--- /dev/null
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+@texture = internal addrspace(1) global i64 0, align 8
+; CHECK: .global .texref texture
+@surface = internal addrspace(1) global i64 0, align 8
+; CHECK: .global .surfref surface
+
+
+; CHECK: .entry kernel_func_maxntid
+define void @kernel_func_maxntid(float* %a) {
+; CHECK: .maxntid 10, 20, 30
+; CHECK: ret
+  ret void
+}
+
+; CHECK: .entry kernel_func_reqntid
+define void @kernel_func_reqntid(float* %a) {
+; CHECK: .reqntid 11, 22, 33
+; CHECK: ret
+  ret void
+}
+
+; CHECK: .entry kernel_func_minctasm
+define void @kernel_func_minctasm(float* %a) {
+; CHECK: .minnctapersm 42
+; CHECK: ret
+  ret void
+}
+
+
+
+!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
+
+!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
+!2 = metadata !{void (float*)* @kernel_func_maxntid,
+                metadata !"maxntidx", i32 10,
+                metadata !"maxntidy", i32 20,
+                metadata !"maxntidz", i32 30}
+
+!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
+!4 = metadata !{void (float*)* @kernel_func_reqntid,
+                metadata !"reqntidx", i32 11,
+                metadata !"reqntidy", i32 22,
+                metadata !"reqntidz", i32 33}
+
+!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
+!6 = metadata !{void (float*)* @kernel_func_minctasm,
+                metadata !"minctasm", i32 42}
+
+!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
+!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
new file mode 100644
index 0000000..73c77f5
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; f64
+
+define double @fadd_f64(double %a, double %b) {
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fadd double %a, %b
+  ret double %ret
+}
+
+define double @fsub_f64(double %a, double %b) {
+; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fsub double %a, %b
+  ret double %ret
+}
+
+define double @fmul_f64(double %a, double %b) {
+; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fmul double %a, %b
+  ret double %ret
+}
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fdiv double %a, %b
+  ret double %ret
+}
+
+;; PTX does not have a floating-point rem instruction
+
+
+;;; f32
+
+define float @fadd_f32(float %a, float %b) {
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fadd float %a, %b
+  ret float %ret
+}
+
+define float @fsub_f32(float %a, float %b) {
+; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fsub float %a, %b
+  ret float %ret
+}
+
+define float @fmul_f32(float %a, float %b) {
+; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fmul float %a, %b
+  ret float %ret
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fdiv float %a, %b
+  ret float %ret
+}
+
+;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
new file mode 100644
index 0000000..e474fa4
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; f64
+
+define double @fadd_f64(double %a, double %b) {
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fadd double %a, %b
+  ret double %ret
+}
+
+define double @fsub_f64(double %a, double %b) {
+; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fsub double %a, %b
+  ret double %ret
+}
+
+define double @fmul_f64(double %a, double %b) {
+; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fmul double %a, %b
+  ret double %ret
+}
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+  %ret = fdiv double %a, %b
+  ret double %ret
+}
+
+;; PTX does not have a floating-point rem instruction
+
+
+;;; f32
+
+define float @fadd_f32(float %a, float %b) {
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fadd float %a, %b
+  ret float %ret
+}
+
+define float @fsub_f32(float %a, float %b) {
+; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fsub float %a, %b
+  ret float %ret
+}
+
+define float @fmul_f32(float %a, float %b) {
+; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fmul float %a, %b
+  ret float %ret
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+  %ret = fdiv float %a, %b
+  ret float %ret
+}
+
+;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll
new file mode 100644
index 0000000..529f849
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -0,0 +1,295 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; i64
+
+define i64 @add_i64(i64 %a, i64 %b) {
+; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = add i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @sub_i64(i64 %a, i64 %b) {
+; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = sub i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @mul_i64(i64 %a, i64 %b) {
+; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = mul i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @sdiv_i64(i64 %a, i64 %b) {
+; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = sdiv i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @udiv_i64(i64 %a, i64 %b) {
+; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = udiv i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @srem_i64(i64 %a, i64 %b) {
+; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = srem i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @urem_i64(i64 %a, i64 %b) {
+; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = urem i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @and_i64(i64 %a, i64 %b) {
+; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = and i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @or_i64(i64 %a, i64 %b) {
+; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = or i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @xor_i64(i64 %a, i64 %b) {
+; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %ret = xor i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @shl_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = shl i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @ashr_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = ashr i64 %a, %b
+  ret i64 %ret
+}
+
+define i64 @lshr_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = lshr i64 %a, %b
+  ret i64 %ret
+}
+
+
+;;; i32
+
+define i32 @add_i32(i32 %a, i32 %b) {
+; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = add i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @sub_i32(i32 %a, i32 %b) {
+; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = sub i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @mul_i32(i32 %a, i32 %b) {
+; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = mul i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @sdiv_i32(i32 %a, i32 %b) {
+; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = sdiv i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @udiv_i32(i32 %a, i32 %b) {
+; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = udiv i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @srem_i32(i32 %a, i32 %b) {
+; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = srem i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @urem_i32(i32 %a, i32 %b) {
+; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = urem i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @and_i32(i32 %a, i32 %b) {
+; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = and i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @or_i32(i32 %a, i32 %b) {
+; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = or i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @xor_i32(i32 %a, i32 %b) {
+; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = xor i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @shl_i32(i32 %a, i32 %b) {
+; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = shl i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @ashr_i32(i32 %a, i32 %b) {
+; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = ashr i32 %a, %b
+  ret i32 %ret
+}
+
+define i32 @lshr_i32(i32 %a, i32 %b) {
+; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = lshr i32 %a, %b
+  ret i32 %ret
+}
+
+;;; i16
+
+define i16 @add_i16(i16 %a, i16 %b) {
+; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = add i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @sub_i16(i16 %a, i16 %b) {
+; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = sub i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @mul_i16(i16 %a, i16 %b) {
+; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = mul i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @sdiv_i16(i16 %a, i16 %b) {
+; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = sdiv i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @udiv_i16(i16 %a, i16 %b) {
+; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = udiv i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @srem_i16(i16 %a, i16 %b) {
+; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = srem i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @urem_i16(i16 %a, i16 %b) {
+; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = urem i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @and_i16(i16 %a, i16 %b) {
+; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = and i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @or_i16(i16 %a, i16 %b) {
+; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = or i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @xor_i16(i16 %a, i16 %b) {
+; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %ret = xor i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @shl_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = shl i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @ashr_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = ashr i16 %a, %b
+  ret i16 %ret
+}
+
+define i16 @lshr_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %ret = lshr i16 %a, %b
+  ret i16 %ret
+}
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
new file mode 100644
index 0000000..968203e
--- /dev/null
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+;; Kernel function using ptx_kernel calling conv
+
+; CHECK: .entry kernel_func
+define ptx_kernel void @kernel_func(float* %a) {
+; CHECK: ret
+  ret void
+}
+
+;; Device function
+; CHECK: .func device_func
+define void @device_func(float* %a) {
+; CHECK: ret
+  ret void
+}
+
+;; Kernel function using NVVM metadata
+; CHECK: .entry metadata_kernel
+define void @metadata_kernel(float* %a) {
+; CHECK: ret
+  ret void
+}
+
+
+!nvvm.annotations = !{!1}
+
+!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
new file mode 100644
index 0000000..12fc754
--- /dev/null
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -0,0 +1,389 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; i64
+
+define i64 @icmp_eq_i64(i64 %a, i64 %b) {
+; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp eq i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_ne_i64(i64 %a, i64 %b) {
+; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ne i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
+; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ugt i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_uge_i64(i64 %a, i64 %b) {
+; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp uge i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_ult_i64(i64 %a, i64 %b) {
+; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ult i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_ule_i64(i64 %a, i64 %b) {
+; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ule i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
+; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sgt i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_sge_i64(i64 %a, i64 %b) {
+; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sge i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_slt_i64(i64 %a, i64 %b) {
+; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp slt i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+define i64 @icmp_sle_i64(i64 %a, i64 %b) {
+; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sle i64 %a, %b
+  %ret = zext i1 %cmp to i64
+  ret i64 %ret
+}
+
+;;; i32
+
+define i32 @icmp_eq_i32(i32 %a, i32 %b) {
+; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp eq i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_ne_i32(i32 %a, i32 %b) {
+; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ne i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ugt i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_uge_i32(i32 %a, i32 %b) {
+; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp uge i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_ult_i32(i32 %a, i32 %b) {
+; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ult i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_ule_i32(i32 %a, i32 %b) {
+; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ule i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
+; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sgt i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_sge_i32(i32 %a, i32 %b) {
+; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sge i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_slt_i32(i32 %a, i32 %b) {
+; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp slt i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+define i32 @icmp_sle_i32(i32 %a, i32 %b) {
+; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sle i32 %a, %b
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+}
+
+
+;;; i16
+
+define i16 @icmp_eq_i16(i16 %a, i16 %b) {
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp eq i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_ne_i16(i16 %a, i16 %b) {
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ne i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ugt i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_uge_i16(i16 %a, i16 %b) {
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp uge i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_ult_i16(i16 %a, i16 %b) {
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ult i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_ule_i16(i16 %a, i16 %b) {
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ule i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sgt i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_sge_i16(i16 %a, i16 %b) {
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sge i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_slt_i16(i16 %a, i16 %b) {
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp slt i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+define i16 @icmp_sle_i16(i16 %a, i16 %b) {
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sle i16 %a, %b
+  %ret = zext i1 %cmp to i16
+  ret i16 %ret
+}
+
+
+;;; i8
+
+define i8 @icmp_eq_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp eq i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_ne_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ne i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ugt i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_uge_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp uge i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_ult_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ult i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_ule_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp ule i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sgt i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_sge_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sge i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_slt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp slt i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
+
+define i8 @icmp_sle_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+  %cmp = icmp sle i8 %a, %b
+  %ret = zext i1 %cmp to i8
+  ret i8 %ret
+}
diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll
new file mode 100644
index 0000000..21c8437
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-fp.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+define i16 @cvt_i16_f32(float %x) {
+; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui float %x to i16
+  ret i16 %a
+}
+
+define i16 @cvt_i16_f64(double %x) {
+; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui double %x to i16
+  ret i16 %a
+}
+
+define i32 @cvt_i32_f32(float %x) {
+; CHECK: cvt.rzi.u32.f32 %r{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui float %x to i32
+  ret i32 %a
+}
+
+define i32 @cvt_i32_f64(double %x) {
+; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui double %x to i32
+  ret i32 %a
+}
+
+
+define i64 @cvt_i64_f32(float %x) {
+; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui float %x to i64
+  ret i64 %a
+}
+
+define i64 @cvt_i64_f64(double %x) {
+; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+  %a = fptoui double %x to i64
+  ret i64 %a
+}
+
+define float @cvt_f32_i16(i16 %x) {
+; CHECK: cvt.rn.f32.u16 %f{{[0-9]+}}, %rs{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i16 %x to float
+  ret float %a
+}
+
+define float @cvt_f32_i32(i32 %x) {
+; CHECK: cvt.rn.f32.u32 %f{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i32 %x to float
+  ret float %a
+}
+
+define float @cvt_f32_i64(i64 %x) {
+; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i64 %x to float
+  ret float %a
+}
+
+define float @cvt_f32_f64(double %x) {
+; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+  %a = fptrunc double %x to float
+  ret float %a
+}
+
+define float @cvt_f32_s16(i16 %x) {
+; CHECK: cvt.rn.f32.s16 %f{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i16 %x to float
+  ret float %a
+}
+
+define float @cvt_f32_s32(i32 %x) {
+; CHECK: cvt.rn.f32.s32 %f{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i32 %x to float
+  ret float %a
+}
+
+define float @cvt_f32_s64(i64 %x) {
+; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i64 %x to float
+  ret float %a
+}
+
+define double @cvt_f64_i16(i16 %x) {
+; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i16 %x to double
+  ret double %a
+}
+
+define double @cvt_f64_i32(i32 %x) {
+; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i32 %x to double
+  ret double %a
+}
+
+define double @cvt_f64_i64(i64 %x) {
+; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}};
+; CHECK: ret;
+  %a = uitofp i64 %x to double
+  ret double %a
+}
+
+define double @cvt_f64_f32(float %x) {
+; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fpext float %x to double
+  ret double %a
+}
+
+define double @cvt_f64_s16(i16 %x) {
+; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i16 %x to double
+  ret double %a
+}
+
+define double @cvt_f64_s32(i32 %x) {
+; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i32 %x to double
+  ret double %a
+}
+
+define double @cvt_f64_s64(i64 %x) {
+; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i64 %x to double
+  ret double %a
+}
diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll
new file mode 100644
index 0000000..20716f9
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-int-sm10.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+
+
+; i16
+
+define i16 @cvt_i16_i32(i32 %x) {
+; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = trunc i32 %x to i16
+  ret i16 %a
+}
+
+define i16 @cvt_i16_i64(i64 %x) {
+; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %a = trunc i64 %x to i16
+  ret i16 %a
+}
+
+
+
+; i32
+
+define i32 @cvt_i32_i16(i16 %x) {
+; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %a = zext i16 %x to i32
+  ret i32 %a
+}
+
+define i32 @cvt_i32_i64(i64 %x) {
+; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+  %a = trunc i64 %x to i32
+  ret i32 %a
+}
+
+
+
+; i64
+
+define i64 @cvt_i64_i16(i16 %x) {
+; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+  %a = zext i16 %x to i64
+  ret i64 %a
+}
+
+define i64 @cvt_i64_i32(i32 %x) {
+; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = zext i32 %x to i64
+  ret i64 %a
+}
diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll
new file mode 100644
index 0000000..fad240e
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+;; Integer conversions happen inplicitly by loading/storing the proper types
+
+
+; i16
+
+define i16 @cvt_i16_i32(i32 %x) {
+; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
+; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ret
+  %a = trunc i32 %x to i16
+  ret i16 %a
+}
+
+define i16 @cvt_i16_i64(i64 %x) {
+; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
+; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ret
+  %a = trunc i64 %x to i16
+  ret i16 %a
+}
+
+
+
+; i32
+
+define i32 @cvt_i32_i16(i16 %x) {
+; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
+; CHECK: ret
+  %a = zext i16 %x to i32
+  ret i32 %a
+}
+
+define i32 @cvt_i32_i64(i64 %x) {
+; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
+; CHECK: ret
+  %a = trunc i64 %x to i32
+  ret i32 %a
+}
+
+
+
+; i64
+
+define i64 @cvt_i64_i16(i16 %x) {
+; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
+; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
+; CHECK: ret
+  %a = zext i16 %x to i64
+  ret i64 %a
+}
+
+define i64 @cvt_i64_i32(i32 %x) {
+; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
+; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
+; CHECK: ret
+  %a = zext i32 %x to i64
+  ret i64 %a
+}
diff --git a/test/CodeGen/NVPTX/fma-disable.ll b/test/CodeGen/NVPTX/fma-disable.ll
new file mode 100644
index 0000000..bdd7401
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma-disable.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+; FMA: fma.rn.f32
+; MUL: mul.rn.f32
+; MUL: add.rn.f32
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+; FMA: fma.rn.f64
+; MUL: mul.rn.f64
+; MUL: add.rn.f64
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  ret double %b
+}
diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll
new file mode 100644
index 0000000..4ef1a9a
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  ret float %b
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  ret double %b
+}
diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 9f37ead..1c9879c 100644
--- a/test/CodeGen/PTX/intrinsic.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,231 +1,234 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
 define ptx_device i32 @test_tid_x() {
-; CHECK: mov.u32 %ret0, %tid.x;
+; CHECK: mov.u32 %r0, %tid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_y() {
-; CHECK: mov.u32 %ret0, %tid.y;
+; CHECK: mov.u32 %r0, %tid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_z() {
-; CHECK: mov.u32 %ret0, %tid.z;
+; CHECK: mov.u32 %r0, %tid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_w() {
-; CHECK: mov.u32 %ret0, %tid.w;
+; CHECK: mov.u32 %r0, %tid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_x() {
-; CHECK: mov.u32 %ret0, %ntid.x;
+; CHECK: mov.u32 %r0, %ntid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_y() {
-; CHECK: mov.u32 %ret0, %ntid.y;
+; CHECK: mov.u32 %r0, %ntid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_z() {
-; CHECK: mov.u32 %ret0, %ntid.z;
+; CHECK: mov.u32 %r0, %ntid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_w() {
-; CHECK: mov.u32 %ret0, %ntid.w;
+; CHECK: mov.u32 %r0, %ntid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_laneid() {
-; CHECK: mov.u32 %ret0, %laneid;
+; CHECK: mov.u32 %r0, %laneid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.laneid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_warpid() {
-; CHECK: mov.u32 %ret0, %warpid;
+; CHECK: mov.u32 %r0, %warpid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.warpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nwarpid() {
-; CHECK: mov.u32 %ret0, %nwarpid;
+; CHECK: mov.u32 %r0, %nwarpid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nwarpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_x() {
-; CHECK: mov.u32 %ret0, %ctaid.x;
+; CHECK: mov.u32 %r0, %ctaid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_y() {
-; CHECK: mov.u32 %ret0, %ctaid.y;
+; CHECK: mov.u32 %r0, %ctaid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_z() {
-; CHECK: mov.u32 %ret0, %ctaid.z;
+; CHECK: mov.u32 %r0, %ctaid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_w() {
-; CHECK: mov.u32 %ret0, %ctaid.w;
+; CHECK: mov.u32 %r0, %ctaid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_x() {
-; CHECK: mov.u32 %ret0, %nctaid.x;
+; CHECK: mov.u32 %r0, %nctaid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_y() {
-; CHECK: mov.u32 %ret0, %nctaid.y;
+; CHECK: mov.u32 %r0, %nctaid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_z() {
-; CHECK: mov.u32 %ret0, %nctaid.z;
+; CHECK: mov.u32 %r0, %nctaid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_w() {
-; CHECK: mov.u32 %ret0, %nctaid.w;
+; CHECK: mov.u32 %r0, %nctaid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_smid() {
-; CHECK: mov.u32 %ret0, %smid;
+; CHECK: mov.u32 %r0, %smid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.smid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nsmid() {
-; CHECK: mov.u32 %ret0, %nsmid;
+; CHECK: mov.u32 %r0, %nsmid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nsmid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_gridid() {
-; CHECK: mov.u32 %ret0, %gridid;
+; CHECK: mov.u32 %r0, %gridid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.gridid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_eq() {
-; CHECK: mov.u32 %ret0, %lanemask_eq;
+; CHECK: mov.u32 %r0, %lanemask_eq;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.eq()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_le() {
-; CHECK: mov.u32 %ret0, %lanemask_le;
+; CHECK: mov.u32 %r0, %lanemask_le;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.le()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_lt() {
-; CHECK: mov.u32 %ret0, %lanemask_lt;
+; CHECK: mov.u32 %r0, %lanemask_lt;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.lt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_ge() {
-; CHECK: mov.u32 %ret0, %lanemask_ge;
+; CHECK: mov.u32 %r0, %lanemask_ge;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.ge()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_gt() {
-; CHECK: mov.u32 %ret0, %lanemask_gt;
+; CHECK: mov.u32 %r0, %lanemask_gt;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.gt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_clock() {
-; CHECK: mov.u32 %ret0, %clock;
+; CHECK: mov.u32 %r0, %clock;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.clock()
 	ret i32 %x
 }
 
 define ptx_device i64 @test_clock64() {
-; CHECK: mov.u64 %ret0, %clock64;
+; CHECK: mov.u64 %rl0, %clock64;
 ; CHECK: ret;
 	%x = call i64 @llvm.ptx.read.clock64()
 	ret i64 %x
 }
 
 define ptx_device i32 @test_pm0() {
-; CHECK: mov.u32 %ret0, %pm0;
+; CHECK: mov.u32 %r0, %pm0;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm0()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm1() {
-; CHECK: mov.u32 %ret0, %pm1;
+; CHECK: mov.u32 %r0, %pm1;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm1()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm2() {
-; CHECK: mov.u32 %ret0, %pm2;
+; CHECK: mov.u32 %r0, %pm2;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm2()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm3() {
-; CHECK: mov.u32 %ret0, %pm3;
+; CHECK: mov.u32 %r0, %pm3;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm3()
 	ret i32 %x
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
new file mode 100644
index 0000000..afab60c
--- /dev/null
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+define ptx_device float @test_fabsf(float %f) {
+; CHECK: abs.f32 %f0, %f0;
+; CHECK: ret;
+	%x = call float @llvm.fabs.f32(float %f)
+	ret float %x
+}
+
+define ptx_device double @test_fabs(double %d) {
+; CHECK: abs.f64 %fl0, %fl0;
+; CHECK: ret;
+	%x = call double @llvm.fabs.f64(double %d)
+	ret double %x
+}
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
new file mode 100644
index 0000000..d1f5093d
--- /dev/null
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -0,0 +1,173 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
+; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i8 addrspace(1)* %ptr
+  ret i8 %a
+}
+
+define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i8 addrspace(3)* %ptr
+  ret i8 %a
+}
+
+define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
+; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i8 addrspace(5)* %ptr
+  ret i8 %a
+}
+
+;; i16
+define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
+; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i16 addrspace(1)* %ptr
+  ret i16 %a
+}
+
+define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i16 addrspace(3)* %ptr
+  ret i16 %a
+}
+
+define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
+; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i16 addrspace(5)* %ptr
+  ret i16 %a
+}
+
+;; i32
+define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
+; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i32 addrspace(1)* %ptr
+  ret i32 %a
+}
+
+define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i32 addrspace(3)* %ptr
+  ret i32 %a
+}
+
+define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
+; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i32 addrspace(5)* %ptr
+  ret i32 %a
+}
+
+;; i64
+define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
+; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i64 addrspace(1)* %ptr
+  ret i64 %a
+}
+
+define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i64 addrspace(3)* %ptr
+  ret i64 %a
+}
+
+define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
+; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i64 addrspace(5)* %ptr
+  ret i64 %a
+}
+
+;; f32
+define float @ld_global_f32(float addrspace(1)* %ptr) {
+; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load float addrspace(1)* %ptr
+  ret float %a
+}
+
+define float @ld_shared_f32(float addrspace(3)* %ptr) {
+; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load float addrspace(3)* %ptr
+  ret float %a
+}
+
+define float @ld_local_f32(float addrspace(5)* %ptr) {
+; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load float addrspace(5)* %ptr
+  ret float %a
+}
+
+;; f64
+define double @ld_global_f64(double addrspace(1)* %ptr) {
+; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load double addrspace(1)* %ptr
+  ret double %a
+}
+
+define double @ld_shared_f64(double addrspace(3)* %ptr) {
+; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load double addrspace(3)* %ptr
+  ret double %a
+}
+
+define double @ld_local_f64(double addrspace(5)* %ptr) {
+; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load double addrspace(5)* %ptr
+  ret double %a
+}
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
new file mode 100644
index 0000000..81a5216
--- /dev/null
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
+; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i8 addrspace(0)* %ptr
+  ret i8 %a
+}
+
+;; i16
+define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
+; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i16 addrspace(0)* %ptr
+  ret i16 %a
+}
+
+;; i32
+define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
+; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i32 addrspace(0)* %ptr
+  ret i32 %a
+}
+
+;; i64
+define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
+; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load i64 addrspace(0)* %ptr
+  ret i64 %a
+}
+
+;; f32
+define float @ld_global_f32(float addrspace(0)* %ptr) {
+; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load float addrspace(0)* %ptr
+  ret float %a
+}
+
+;; f64
+define double @ld_global_f64(double addrspace(0)* %ptr) {
+; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+  %a = load double addrspace(0)* %ptr
+  ret double %a
+}
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg
index e748f7f..7180c84 100644
--- a/test/CodeGen/PTX/lit.local.cfg
+++ b/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,6 +1,5 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
 targets = set(config.root.targets_to_build.split())
-if not 'PTX' in targets:
+if not 'NVPTX' in targets:
     config.unsupported = True
-
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
new file mode 100644
index 0000000..ab6f423
--- /dev/null
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+
+; CHECK: .func ({{.*}}) device_func
+define float @device_func(float %a) noinline {
+  %ret = fmul float %a, %a
+  ret float %ret
+}
+
+; CHECK: .entry kernel_func
+define void @kernel_func(float* %a) {
+  %val = load float* %a
+; CHECK: call.uni (retval0),
+; CHECK: device_func,
+  %mul = call float @device_func(float %val)
+  store float %mul, float* %a
+  ret void
+}
+
+
+
+!nvvm.annotations = !{!1}
+
+!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
new file mode 100644
index 0000000..54e04ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+
+define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
+; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+  store i8 %a, i8 addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
+; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+  store i8 %a, i8 addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
+; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+  store i8 %a, i8 addrspace(5)* %ptr
+  ret void
+}
+
+;; i16
+
+define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
+; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+  store i16 %a, i16 addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
+; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+  store i16 %a, i16 addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
+; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+  store i16 %a, i16 addrspace(5)* %ptr
+  ret void
+}
+
+;; i32
+
+define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
+; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+  store i32 %a, i32 addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
+; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+  store i32 %a, i32 addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
+; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+  store i32 %a, i32 addrspace(5)* %ptr
+  ret void
+}
+
+;; i64
+
+define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
+; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+  store i64 %a, i64 addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
+; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+  store i64 %a, i64 addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
+; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+  store i64 %a, i64 addrspace(5)* %ptr
+  ret void
+}
+
+;; f32
+
+define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
+; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+  store float %a, float addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
+; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+  store float %a, float addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
+; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+  store float %a, float addrspace(5)* %ptr
+  ret void
+}
+
+;; f64
+
+define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
+; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+  store double %a, double addrspace(1)* %ptr
+  ret void
+}
+
+define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
+; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+  store double %a, double addrspace(3)* %ptr
+  ret void
+}
+
+define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
+; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+  store double %a, double addrspace(5)* %ptr
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/st-generic.ll b/test/CodeGen/NVPTX/st-generic.ll
new file mode 100644
index 0000000..59a1fe0
--- /dev/null
+++ b/test/CodeGen/NVPTX/st-generic.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+
+define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
+; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+  store i8 %a, i8 addrspace(0)* %ptr
+  ret void
+}
+
+;; i16
+
+define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
+; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+  store i16 %a, i16 addrspace(0)* %ptr
+  ret void
+}
+
+;; i32
+
+define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
+; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+  store i32 %a, i32 addrspace(0)* %ptr
+  ret void
+}
+
+;; i64
+
+define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
+; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+  store i64 %a, i64 addrspace(0)* %ptr
+  ret void
+}
+
+;; f32
+
+define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
+; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+  store float %a, float addrspace(0)* %ptr
+  ret void
+}
+
+;; f64
+
+define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
+; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+  store double %a, double addrspace(0)* %ptr
+  ret void
+}
diff --git a/test/CodeGen/PTX/20110926-sitofp.ll b/test/CodeGen/PTX/20110926-sitofp.ll
deleted file mode 100644
index 38d35c5..0000000
--- a/test/CodeGen/PTX/20110926-sitofp.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-@A = common global [1536 x [1536 x float]] zeroinitializer, align 4
-@B = common global [1536 x [1536 x float]] zeroinitializer, align 4
-
-define internal ptx_device void @init_array(i32 %x, i32 %y) {
-  %arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y
-  %arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y
-  %mul5 = mul i32 %x, %y
-  %rem = srem i32 %mul5, 1024
-  %add = add nsw i32 %rem, 1
-; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
-  %conv = sitofp i32 %add to double
-  %div = fmul double %conv, 5.000000e-01
-  %conv7 = fptrunc double %div to float
-  store float %conv7, float* %arrayidx103, align 4
-  %rem14 = srem i32 %mul5, 1024
-  %add15 = add nsw i32 %rem14, 1
-  %conv16 = sitofp i32 %add15 to double
-  %div17 = fmul double %conv16, 5.000000e-01
-  %conv18 = fptrunc double %div17 to float
-  store float %conv18, float* %arrayidx224, align 4
-  ret void
-}
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
deleted file mode 100644
index 8b10d11..0000000
--- a/test/CodeGen/PTX/add.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%z = add i16 %x, %y
-	ret i16 %z
-}
-
-define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%z = add i32 %x, %y
-	ret i32 %z
-}
-
-define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%z = add i64 %x, %y
-	ret i64 %z
-}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
-  %z = fadd float %x, %y
-  ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
-  %z = fadd double %x, %y
-  ret double %z
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1;
-; CHECK: ret;
-	%z = add i16 %x, 1
-	ret i16 %z
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1;
-; CHECK: ret;
-	%z = add i32 %x, 1
-	ret i32 %z
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1;
-; CHECK: ret;
-	%z = add i64 %x, 1
-	ret i64 %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000;
-; CHECK: ret;
-  %z = fadd float %x, 1.0
-  ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000;
-; CHECK: ret;
-  %z = fadd double %x, 1.0
-  ret double %z
-}
diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll
deleted file mode 100644
index 3fc0c40..0000000
--- a/test/CodeGen/PTX/aggregates.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-; XFAIL: *
-
-%complex = type { float, float }
-
-define ptx_device %complex @complex_add(%complex %a, %complex %b) {
-entry:
-; CHECK:      ld.param.f32	r[[R0:[0-9]+]], [__param_1];
-; CHECK-NEXT:	ld.param.f32	r[[R2:[0-9]+]], [__param_3];
-; CHECK-NEXT:	ld.param.f32	r[[R1:[0-9]+]], [__param_2];
-; CHECK-NEXT:	ld.param.f32	r[[R3:[0-9]+]], [__param_4];
-; CHECK-NEXT:	add.rn.f32	r[[R0]], r[[R0]], r[[R2]];
-; CHECK-NEXT:	add.rn.f32	r[[R1]], r[[R1]], r[[R3]];
-; CHECK-NEXT:	ret;
-  %a.real = extractvalue %complex %a, 0
-  %a.imag = extractvalue %complex %a, 1
-  %b.real = extractvalue %complex %b, 0
-  %b.imag = extractvalue %complex %b, 1
-  %ret.real = fadd float %a.real, %b.real
-  %ret.imag = fadd float %a.imag, %b.imag
-  %ret.0 = insertvalue %complex undef, float %ret.real, 0
-  %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
-  ret %complex %ret.1
-}
diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll
deleted file mode 100644
index 1403a23..0000000
--- a/test/CodeGen/PTX/bitwise.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; preds
-
-define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
-; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
-  %c = and i1 %x, %y
-  %d = zext i1 %c to i32 
-  ret i32 %d
-}
-
-define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
-; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
-  %a = or i1 %x, %y
-  %b = zext i1 %a to i32 
-  ret i32 %b
-}
-
-define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
-; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
-  %a = xor i1 %x, %y
-  %b = zext i1 %a to i32 
-  ret i32 %b
-}
diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll
deleted file mode 100644
index 464c29c..0000000
--- a/test/CodeGen/PTX/bra.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device void @test_bra_direct() {
-; CHECK: bra $L__BB0_1;
-entry:
-	br label %loop
-loop:
-	br label %loop
-}
-
-define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
-entry:
-; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]]
-	%p = icmp ugt i32 %x, %y
-; CHECK-NEXT: @%p0 bra
-; CHECK-NOT: bra
-	br i1 %p, label %clause.if, label %clause.else
-clause.if:
-; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]]
-	ret i32 %x
-clause.else:
-; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]]
-	ret i32 %y
-}
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
deleted file mode 100644
index f55070a..0000000
--- a/test/CodeGen/PTX/cvt.ll
+++ /dev/null
@@ -1,290 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; preds
-; (note: we convert back to i32 to return)
-
-define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
-	%a = trunc i16 %x to i1
-	%b = and i1 %a, %y
-	%c = zext i1 %b to i32
-	ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
-	%a = trunc i32 %x to i1
-	%b = and i1 %a, %y
-	%c = zext i1 %b to i32
-	ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
-	%a = trunc i64 %x to i1
-	%b = and i1 %a, %y
-	%c = zext i1 %b to i32
-	ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
-	%a = fptoui float %x to i1
-	%b = and i1 %a, %y
-	%c = zext i1 %b to i32
-	ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
-	%a = fptoui double %x to i1
-	%b = and i1 %a, %y
-	%c = zext i1 %b to i32
-	ret i32 %c
-}
-
-; i16
-
-define ptx_device i16 @cvt_i16_preds(i1 %x) {
-; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i1 %x to i16
-	ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%a = trunc i32 %x to i16
-	ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%a = trunc i64 %x to i16
-	ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_f32(float %x) {
-; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui float %x to i16
-	ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_f64(double %x) {
-; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui double %x to i16
-	ret i16 %a
-}
-
-; i32
-
-define ptx_device i32 @cvt_i32_preds(i1 %x) {
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i1 %x to i32
-	ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i16 %x to i32
-	ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%a = trunc i64 %x to i32
-	ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_f32(float %x) {
-; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui float %x to i32
-	ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_f64(double %x) {
-; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui double %x to i32
-	ret i32 %a
-}
-
-; i64
-
-define ptx_device i64 @cvt_i64_preds(i1 %x) {
-; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i1 %x to i64
-	ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i16 %x to i64
-	ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%a = zext i32 %x to i64
-	ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_f32(float %x) {
-; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui float %x to i64
-	ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_f64(double %x) {
-; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fptoui double %x to i64
-	ret i64 %a
-}
-
-; f32
-
-define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: mov.b32 %f0, 0;
-; CHECK: mov.b32 %f1, 1065353216;
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i1 %x to float
-	ret float %a
-}
-
-define ptx_device float @cvt_f32_i16(i16 %x) {
-; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i16 %x to float
-	ret float %a
-}
-
-define ptx_device float @cvt_f32_i32(i32 %x) {
-; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i32 %x to float
-	ret float %a
-}
-
-define ptx_device float @cvt_f32_i64(i64 %x) {
-; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i64 %x to float
-	ret float %a
-}
-
-define ptx_device float @cvt_f32_f64(double %x) {
-; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fptrunc double %x to float
-	ret float %a
-}
-
-define ptx_device float @cvt_f32_s16(i16 %x) {
-; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i16 %x to float
-  ret float %a
-}
-
-define ptx_device float @cvt_f32_s32(i32 %x) {
-; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i32 %x to float
-  ret float %a
-}
-
-define ptx_device float @cvt_f32_s64(i64 %x) {
-; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i64 %x to float
-  ret float %a
-}
-
-; f64
-
-define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: mov.b64 %fd0, 0;
-; CHECK: mov.b64 %fd1, 4575657221408423936;
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i1 %x to double
-	ret double %a
-}
-
-define ptx_device double @cvt_f64_i16(i16 %x) {
-; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i16 %x to double
-	ret double %a
-}
-
-define ptx_device double @cvt_f64_i32(i32 %x) {
-; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i32 %x to double
-	ret double %a
-}
-
-define ptx_device double @cvt_f64_i64(i64 %x) {
-; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%a = uitofp i64 %x to double
-	ret double %a
-}
-
-define ptx_device double @cvt_f64_f32(float %x) {
-; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fpext float %x to double
-	ret double %a
-}
-
-define ptx_device double @cvt_f64_s16(i16 %x) {
-; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i16 %x to double
-  ret double %a
-}
-
-define ptx_device double @cvt_f64_s32(i32 %x) {
-; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i32 %x to double
-  ret double %a
-}
-
-define ptx_device double @cvt_f64_s64(i64 %x) {
-; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: ret
-  %a = sitofp i64 %x to double
-  ret double %a
-}
diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll
deleted file mode 100644
index 7816c80..0000000
--- a/test/CodeGen/PTX/exit.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_kernel void @t1() {
-; CHECK: exit;
-; CHECK-NOT: ret;
-  ret void
-}
-
-define ptx_kernel void @t2(i32* %p, i32 %x) {
-  store i32 %x, i32* %p
-; CHECK: exit;
-; CHECK-NOT: ret;
-  ret void
-}
diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll
deleted file mode 100644
index e1013be..0000000
--- a/test/CodeGen/PTX/fdiv-sm10.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fdiv float %x, %y
-	ret float %a
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fdiv double %x, %y
-	ret double %a
-}
diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll
deleted file mode 100644
index 1afa2eb..0000000
--- a/test/CodeGen/PTX/fdiv-sm13.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fdiv float %x, %y
-	ret float %a
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fdiv double %x, %y
-	ret double %a
-}
diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll
deleted file mode 100644
index 2b76e63..0000000
--- a/test/CodeGen/PTX/fneg.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x) {
-; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%y = fsub float -0.000000e+00, %x
-	ret float %y
-}
-
-define ptx_device double @t1_f64(double %x) {
-; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%y = fsub double -0.000000e+00, %x
-	ret double %y
-}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
deleted file mode 100644
index e55820d..0000000
--- a/test/CodeGen/PTX/ld.ll
+++ /dev/null
@@ -1,382 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;CHECK: .extern .global .b16 array_i16[10];
-@array_i16 = external global [10 x i16]
-
-;CHECK: .extern .const .b16 array_constant_i16[10];
-@array_constant_i16 = external addrspace(1) constant [10 x i16]
-
-;CHECK: .extern .shared .b16 array_shared_i16[10];
-@array_shared_i16 = external addrspace(4) global [10 x i16]
-
-;CHECK: .extern .global .b32 array_i32[10];
-@array_i32 = external global [10 x i32]
-
-;CHECK: .extern .const .b32 array_constant_i32[10];
-@array_constant_i32 = external addrspace(1) constant [10 x i32]
-
-;CHECK: .extern .shared .b32 array_shared_i32[10];
-@array_shared_i32 = external addrspace(4) global [10 x i32]
-
-;CHECK: .extern .global .b64 array_i64[10];
-@array_i64 = external global [10 x i64]
-
-;CHECK: .extern .const .b64 array_constant_i64[10];
-@array_constant_i64 = external addrspace(1) constant [10 x i64]
-
-;CHECK: .extern .shared .b64 array_shared_i64[10];
-@array_shared_i64 = external addrspace(4) global [10 x i64]
-
-;CHECK: .extern .global .b32 array_float[10];
-@array_float = external global [10 x float]
-
-;CHECK: .extern .const .b32 array_constant_float[10];
-@array_constant_float = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .shared .b32 array_shared_float[10];
-@array_shared_float = external addrspace(4) global [10 x float]
-
-;CHECK: .extern .global .b64 array_double[10];
-@array_double = external global [10 x double]
-
-;CHECK: .extern .const .b64 array_constant_double[10];
-@array_constant_double = external addrspace(1) constant [10 x double]
-
-;CHECK: .extern .shared .b64 array_shared_double[10];
-@array_shared_double = external addrspace(4) global [10 x double]
-
-
-define ptx_device i16 @t1_u16(i16* %p) {
-entry:
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
-  %x = load i16* %p
-  ret i16 %x
-}
-
-define ptx_device i32 @t1_u32(i32* %p) {
-entry:
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
-  %x = load i32* %p
-  ret i32 %x
-}
-
-define ptx_device i64 @t1_u64(i64* %p) {
-entry:
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
-  %x = load i64* %p
-  ret i64 %x
-}
-
-define ptx_device float @t1_f32(float* %p) {
-entry:
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
-  %x = load float* %p
-  ret float %x
-}
-
-define ptx_device double @t1_f64(double* %p) {
-entry:
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
-  %x = load double* %p
-  ret double %x
-}
-
-define ptx_device i16 @t2_u16(i16* %p) {
-entry:
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
-;CHECK: ret;
-  %i = getelementptr i16* %p, i32 1
-  %x = load i16* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t2_u32(i32* %p) {
-entry:
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
-;CHECK: ret;
-  %i = getelementptr i32* %p, i32 1
-  %x = load i32* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t2_u64(i64* %p) {
-entry:
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
-;CHECK: ret;
-  %i = getelementptr i64* %p, i32 1
-  %x = load i64* %i
-  ret i64 %x
-}
-
-define ptx_device float @t2_f32(float* %p) {
-entry:
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
-;CHECK: ret;
-  %i = getelementptr float* %p, i32 1
-  %x = load float* %i
-  ret float %x
-}
-
-define ptx_device double @t2_f64(double* %p) {
-entry:
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
-;CHECK: ret;
-  %i = getelementptr double* %p, i32 1
-  %x = load double* %i
-  ret double %x
-}
-
-define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-  %i = getelementptr i16* %p, i32 %q
-  %x = load i16* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-  %i = getelementptr i32* %p, i32 %q
-  %x = load i32* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-  %i = getelementptr i64* %p, i32 %q
-  %x = load i64* %i
-  ret i64 %x
-}
-
-define ptx_device float @t3_f32(float* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-  %i = getelementptr float* %p, i32 %q
-  %x = load float* %i
-  ret float %x
-}
-
-define ptx_device double @t3_f64(double* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-  %i = getelementptr double* %p, i32 %q
-  %x = load double* %i
-  ret double %x
-}
-
-define ptx_device i16 @t4_global_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
-  %x = load i16* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t4_global_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
-  %x = load i32* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t4_global_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
-  %x = load i64* %i
-  ret i64 %x
-}
-
-define ptx_device float @t4_global_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
-  %x = load float* %i
-  ret float %x
-}
-
-define ptx_device double @t4_global_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
-  %x = load double* %i
-  ret double %x
-}
-
-define ptx_device i16 @t4_const_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
-;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
-  %x = load i16 addrspace(1)* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t4_const_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
-;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
-  %x = load i32 addrspace(1)* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t4_const_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
-;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
-  %x = load i64 addrspace(1)* %i
-  ret i64 %x
-}
-
-define ptx_device float @t4_const_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
-;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
-  %x = load float addrspace(1)* %i
-  ret float %x
-}
-
-define ptx_device double @t4_const_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
-;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
-  %x = load double addrspace(1)* %i
-  ret double %x
-}
-
-define ptx_device i16 @t4_shared_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
-;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
-  %x = load i16 addrspace(4)* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t4_shared_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
-;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
-  %x = load i32 addrspace(4)* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t4_shared_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
-;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
-  %x = load i64 addrspace(4)* %i
-  ret i64 %x
-}
-
-define ptx_device float @t4_shared_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
-;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
-  %x = load float addrspace(4)* %i
-  ret float %x
-}
-
-define ptx_device double @t4_shared_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
-;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
-  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
-  %x = load double addrspace(4)* %i
-  ret double %x
-}
-
-define ptx_device i16 @t5_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
-;CHECK: ret;
-  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
-  %x = load i16* %i
-  ret i16 %x
-}
-
-define ptx_device i32 @t5_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
-;CHECK: ret;
-  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
-  %x = load i32* %i
-  ret i32 %x
-}
-
-define ptx_device i64 @t5_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
-;CHECK: ret;
-  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
-  %x = load i64* %i
-  ret i64 %x
-}
-
-define ptx_device float @t5_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
-;CHECK: ret;
-  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
-  %x = load float* %i
-  ret float %x
-}
-
-define ptx_device double @t5_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
-;CHECK: ret;
-  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
-  %x = load double* %i
-  ret double %x
-}
diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll
deleted file mode 100644
index e73ad25..0000000
--- a/test/CodeGen/PTX/llvm-intrinsic.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
-
-define ptx_device float @test_sqrt_f32(float %x) {
-entry:
-; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-  %y = call float @llvm.sqrt.f32(float %x)
-  ret float %y
-}
-
-define ptx_device double @test_sqrt_f64(double %x) {
-entry:
-; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-  %y = call double @llvm.sqrt.f64(double %x)
-  ret double %y
-}
-
-define ptx_device float @test_sin_f32(float %x) {
-entry:
-; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-  %y = call float @llvm.sin.f32(float %x)
-  ret float %y
-}
-
-define ptx_device double @test_sin_f64(double %x) {
-entry:
-; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-  %y = call double @llvm.sin.f64(double %x)
-  ret double %y
-}
-
-define ptx_device float @test_cos_f32(float %x) {
-entry:
-; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-  %y = call float @llvm.cos.f32(float %x)
-  ret float %y
-}
-
-define ptx_device double @test_cos_f64(double %x) {
-entry:
-; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-  %y = call double @llvm.cos.f64(double %x)
-  ret double %y
-}
-
-declare float  @llvm.sqrt.f32(float)
-declare double @llvm.sqrt.f64(double)
-declare float  @llvm.sin.f32(float)
-declare double @llvm.sin.f64(double)
-declare float  @llvm.cos.f32(float)
-declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
deleted file mode 100644
index 603c3ba..0000000
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
-
-define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
-entry:
-; FMA: mad.rn.f32
-; MUL: mul.rn.f32
-; MUL: add.rn.f32
-  %a = fmul float %x, %y
-  %b = fadd float %a, %z
-  ret float %b
-}
-
-define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
-entry:
-; FMA: mad.rn.f64
-; MUL: mul.rn.f64
-; MUL: add.rn.f64
-  %a = fmul double %x, %y
-  %b = fadd double %a, %z
-  ret double %b
-}
diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll
deleted file mode 100644
index cc28e3f..0000000
--- a/test/CodeGen/PTX/mad.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y, float %z) {
-; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
-	%a = fmul float %x, %y
-  %b = fadd float %a, %z
-	ret float %b
-}
-
-define ptx_device double @t1_f64(double %x, double %y, double %z) {
-; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
-	%a = fmul double %x, %y
-  %b = fadd double %a, %z
-	ret double %b
-}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
deleted file mode 100644
index 9e501be..0000000
--- a/test/CodeGen/PTX/mov.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16() {
-; CHECK: mov.u16 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
-	ret i16 0
-}
-
-define ptx_device i32 @t1_u32() {
-; CHECK: mov.u32 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
-	ret i32 0
-}
-
-define ptx_device i64 @t1_u64() {
-; CHECK: mov.u64 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
-	ret i64 0
-}
-
-define ptx_device float @t1_f32() {
-; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000;
-; CHECK: ret;
-	ret float 0.0
-}
-
-define ptx_device double @t1_f64() {
-; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000;
-; CHECK: ret;
-	ret double 0.0
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
-	ret i16 %x
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
-	ret i32 %x
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
-	ret i64 %x
-}
-
-define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
-	ret float %x
-}
-
-define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
-	ret double %x
-}
-
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
deleted file mode 100644
index 91949db..0000000
--- a/test/CodeGen/PTX/mul.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;define ptx_device i32 @t1(i32 %x, i32 %y) {
-;	%z = mul i32 %x, %y
-;	ret i32 %z
-;}
-
-;define ptx_device i32 @t2(i32 %x) {
-;	%z = mul i32 %x, 1
-;	ret i32 %z
-;}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
-  %z = fmul float %x, %y
-  ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
-  %z = fmul double %x, %y
-  ret double %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000;
-; CHECK: ret;
-  %z = fmul float %x, 5.0
-  ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000;
-; CHECK: ret;
-  %z = fmul double %x, 5.0
-  ret double %z
-}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
deleted file mode 100644
index 0fb6602..0000000
--- a/test/CodeGen/PTX/options.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
-; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
-; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
-; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
-; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
-; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
-; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
-; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
-
-define ptx_device void @t1() {
-	ret void
-}
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
deleted file mode 100644
index 377f173..0000000
--- a/test/CodeGen/PTX/parameter-order.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
-define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
-; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
-	%result = sub i32 %b, %c
-	ret i32 %result
-}
diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll
deleted file mode 100644
index f901b20..0000000
--- a/test/CodeGen/PTX/printf.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-
-@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
-
-define ptx_device void @t1_printf() {
-; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
-; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
-; CHECK: ret;
-    %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
-	ret void
-}
-
-@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
-
-define ptx_device void @t2_printf() {
-; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
-; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
-; CHECK: cvta.local.u64  %rd{{[0-9]+}}, __local{{[0-9+]}};
-; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
-; CHECK: ret;
-  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
-  ret void
-}
diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll
deleted file mode 100644
index ba0523f..0000000
--- a/test/CodeGen/PTX/ret.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device void @t1() {
-; CHECK: ret;
-; CHECK-NOT: exit;
-	ret void
-}
diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll
deleted file mode 100644
index aa7ce85..0000000
--- a/test/CodeGen/PTX/selp.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
-; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}};
-	%a = select i1 %x, i32 %y, i32 %z
-	ret i32 %a
-}
-
-define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
-; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}};
-	%a = select i1 %x, i64 %y, i64 %z
-	ret i64 %a
-}
-
-define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}};
-	%a = select i1 %x, float %y, float %z
-	ret float %a
-}
-
-define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}};
-	%a = select i1 %x, double %y, double %z
-	ret double %a
-}
diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll
deleted file mode 100644
index 646abab..0000000
--- a/test/CodeGen/PTX/setp.ll
+++ /dev/null
@@ -1,206 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp eq i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ne i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ult i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ule i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ugt i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp uge i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp slt i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sle i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sgt i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sge i32 %x, %y
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp eq i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ne i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ult i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ule i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp ugt i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp uge i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp slt i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sle i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sgt i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
-	%p = icmp sge i32 %x, 1
-	%z = zext i1 %p to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]];
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
-; CHECK: ret;
-	%c = icmp eq i32 %x, %y
-	%d = icmp ugt i32 %u, %v
-	%e = and i1 %c, %d
-	%z = zext i1 %e to i32
-	ret i32 %z
-}
-
-define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]];
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
-; CHECK: ret;
-	%c = trunc i32 %w to i1
-	%d = icmp eq i32 %x, %y
-	%e = xor i1 %c, 1
-	%f = and i1 %d, %e
-	%z = zext i1 %f to i32
-	ret i32 %z
-}
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
deleted file mode 100644
index d9fe2cd..0000000
--- a/test/CodeGen/PTX/shl.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
-	%z = shl i32 %x, %y
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
-	%z = shl i32 %x, 3
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t3(i32 %x) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
-	%z = shl i32 3, %x
-; CHECK: ret;
-	ret i32 %z
-}
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
deleted file mode 100644
index eb4666f..0000000
--- a/test/CodeGen/PTX/shr.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
-	%z = lshr i32 %x, %y
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
-	%z = lshr i32 %x, 3
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t3(i32 %x) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
-	%z = lshr i32 3, %x
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t4(i32 %x, i32 %y) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
-	%z = ashr i32 %x, %y
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t5(i32 %x) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
-	%z = ashr i32 %x, 3
-; CHECK: ret;
-	ret i32 %z
-}
-
-define ptx_device i32 @t6(i32 %x) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}}
-	%z = ashr i32 -3, %x
-; CHECK: ret;
-	ret i32 %z
-}
diff --git a/test/CodeGen/PTX/simple-call.ll b/test/CodeGen/PTX/simple-call.ll
deleted file mode 100644
index 77ea29e..0000000
--- a/test/CodeGen/PTX/simple-call.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-
-define ptx_device void @test_add(float %x, float %y) {
-; CHECK: ret;
-	%z = fadd float %x, %y
-	ret void
-}
-
-define ptx_device float @test_call(float %x, float %y) {
-  %a = fadd float %x, %y
-; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
-  call void @test_add(float %a, float %y)
-  ret float %a
-}
-
-define ptx_device float @test_compute(float %x, float %y) {
-; CHECK: ret;
-  %z = fadd float %x, %y
-  ret float %z
-}
-
-define ptx_device float @test_call_compute(float %x, float %y) {
-; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}})
-  %z = call float @test_compute(float %x, float %y)
-  ret float %z
-}
-
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
deleted file mode 100644
index c794363..0000000
--- a/test/CodeGen/PTX/st.ll
+++ /dev/null
@@ -1,337 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;CHECK: .extern .global .b16 array_i16[10];
-@array_i16 = external global [10 x i16]
-
-;CHECK: .extern .const .b16 array_constant_i16[10];
-@array_constant_i16 = external addrspace(1) constant [10 x i16]
-
-;CHECK: .extern .shared .b16 array_shared_i16[10];
-@array_shared_i16 = external addrspace(4) global [10 x i16]
-
-;CHECK: .extern .global .b32 array_i32[10];
-@array_i32 = external global [10 x i32]
-
-;CHECK: .extern .const .b32 array_constant_i32[10];
-@array_constant_i32 = external addrspace(1) constant [10 x i32]
-
-;CHECK: .extern .shared .b32 array_shared_i32[10];
-@array_shared_i32 = external addrspace(4) global [10 x i32]
-
-;CHECK: .extern .global .b64 array_i64[10];
-@array_i64 = external global [10 x i64]
-
-;CHECK: .extern .const .b64 array_constant_i64[10];
-@array_constant_i64 = external addrspace(1) constant [10 x i64]
-
-;CHECK: .extern .shared .b64 array_shared_i64[10];
-@array_shared_i64 = external addrspace(4) global [10 x i64]
-
-;CHECK: .extern .global .b32 array_float[10];
-@array_float = external global [10 x float]
-
-;CHECK: .extern .const .b32 array_constant_float[10];
-@array_constant_float = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .shared .b32 array_shared_float[10];
-@array_shared_float = external addrspace(4) global [10 x float]
-
-;CHECK: .extern .global .b64 array_double[10];
-@array_double = external global [10 x double]
-
-;CHECK: .extern .const .b64 array_constant_double[10];
-@array_constant_double = external addrspace(1) constant [10 x double]
-
-;CHECK: .extern .shared .b64 array_shared_double[10];
-@array_shared_double = external addrspace(4) global [10 x double]
-
-
-define ptx_device void @t1_u16(i16* %p, i16 %x) {
-entry:
-;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
-;CHECK: ret;
-  store i16 %x, i16* %p
-  ret void
-}
-
-define ptx_device void @t1_u32(i32* %p, i32 %x) {
-entry:
-;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
-;CHECK: ret;
-  store i32 %x, i32* %p
-  ret void
-}
-
-define ptx_device void @t1_u64(i64* %p, i64 %x) {
-entry:
-;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
-;CHECK: ret;
-  store i64 %x, i64* %p
-  ret void
-}
-
-define ptx_device void @t1_f32(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
-;CHECK: ret;
-  store float %x, float* %p
-  ret void
-}
-
-define ptx_device void @t1_f64(double* %p, double %x) {
-entry:
-;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
-;CHECK: ret;
-  store double %x, double* %p
-  ret void
-}
-
-define ptx_device void @t2_u16(i16* %p, i16 %x) {
-entry:
-;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i16* %p, i32 1
-  store i16 %x, i16* %i
-  ret void
-}
-
-define ptx_device void @t2_u32(i32* %p, i32 %x) {
-entry:
-;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i32* %p, i32 1
-  store i32 %x, i32* %i
-  ret void
-}
-
-define ptx_device void @t2_u64(i64* %p, i64 %x) {
-entry:
-;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i64* %p, i32 1
-  store i64 %x, i64* %i
-  ret void
-}
-
-define ptx_device void @t2_f32(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr float* %p, i32 1
-  store float %x, float* %i
-  ret void
-}
-
-define ptx_device void @t2_f64(double* %p, double %x) {
-entry:
-;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr double* %p, i32 1
-  store double %x, double* %i
-  ret void
-}
-
-define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i16* %p, i32 %q
-  store i16 %x, i16* %i
-  ret void
-}
-
-define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i32* %p, i32 %q
-  store i32 %x, i32* %i
-  ret void
-}
-
-define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr i64* %p, i32 %q
-  store i64 %x, i64* %i
-  ret void
-}
-
-define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr float* %p, i32 %q
-  store float %x, float* %i
-  ret void
-}
-
-define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr double* %p, i32 %q
-  store double %x, double* %i
-  ret void
-}
-
-define ptx_device void @t4_global_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
-  store i16 %x, i16* %i
-  ret void
-}
-
-define ptx_device void @t4_global_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
-  store i32 %x, i32* %i
-  ret void
-}
-
-define ptx_device void @t4_global_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
-  store i64 %x, i64* %i
-  ret void
-}
-
-define ptx_device void @t4_global_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
-  store float %x, float* %i
-  ret void
-}
-
-define ptx_device void @t4_global_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
-  store double %x, double* %i
-  ret void
-}
-
-define ptx_device void @t4_shared_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
-;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
-  store i16 %x, i16 addrspace(4)* %i
-  ret void
-}
-
-define ptx_device void @t4_shared_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
-;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
-  store i32 %x, i32 addrspace(4)* %i
-  ret void
-}
-
-define ptx_device void @t4_shared_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
-;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
-  store i64 %x, i64 addrspace(4)* %i
-  ret void
-}
-
-define ptx_device void @t4_shared_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
-;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
-  store float %x, float addrspace(4)* %i
-  ret void
-}
-
-define ptx_device void @t4_shared_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
-;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
-  store double %x, double addrspace(4)* %i
-  ret void
-}
-
-define ptx_device void @t5_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
-  store i16 %x, i16* %i
-  ret void
-}
-
-define ptx_device void @t5_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
-  store i32 %x, i32* %i
-  ret void
-}
-
-define ptx_device void @t5_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
-  store i64 %x, i64* %i
-  ret void
-}
-
-define ptx_device void @t5_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
-  store float %x, float* %i
-  ret void
-}
-
-define ptx_device void @t5_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}};
-;CHECK: ret;
-  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
-  store double %x, double* %i
-  ret void
-}
diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll
deleted file mode 100644
index 65f8ee2..0000000
--- a/test/CodeGen/PTX/stack-object.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-
-define ptx_device float @stack1(float %a) {
-  ; CHECK: .local .align 4 .b8 __local0[4];
-  %a.2 = alloca float, align 4
-  ; CHECK: st.local.f32 [__local0], %f0
-  store float %a, float* %a.2
-  %a.3 = load float* %a.2
-  ret float %a.3
-}
-
-define ptx_device float @stack1_align8(float %a) {
-  ; CHECK: .local .align 8 .b8 __local0[4];
-  %a.2 = alloca float, align 8
-  ; CHECK: st.local.f32 [__local0], %f0
-  store float %a, float* %a.2
-  %a.3 = load float* %a.2
-  ret float %a.3
-}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
deleted file mode 100644
index 7ac886a..0000000
--- a/test/CodeGen/PTX/sub.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
-	%z = sub i16 %x, %y
-	ret i16 %z
-}
-
-define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
-	%z = sub i32 %x, %y
-	ret i32 %z
-}
-
-define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
-	%z = sub i64 %x, %y
-	ret i64 %z
-}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
-  %z = fsub float %x, %y
-  ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
-  %z = fsub double %x, %y
-  ret double %z
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1;
-; CHECK: ret;
-	%z = sub i16 %x, 1
-	ret i16 %z
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1;
-; CHECK: ret;
-	%z = sub i32 %x, 1
-	ret i32 %z
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1;
-; CHECK: ret;
-	%z = sub i64 %x, 1
-	ret i64 %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000;
-; CHECK: ret;
-  %z = fsub float %x, 1.0
-  ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000;
-; CHECK: ret;
-  %z = fsub double %x, 1.0
-  ret double %z
-}
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
index 5d1df46..4373660 100644
--- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -1,7 +1,7 @@
 ; This function should have exactly one call to fixdfdi, no more!
 
 ; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
-; RUN:    grep {bl .*fixdfdi} | count 1
+; RUN:    grep "bl .*fixdfdi" | count 1
 
 define double @test2(double %tmp.7705) {
         %mem_tmp.2.0.in = fptosi double %tmp.7705 to i64                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index 97bb48e..aeb28af 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | not grep {, f1}
+; RUN: llc < %s | not grep ", f1"
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index 969772e..7e84538 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN:   grep {vspltish v.*, 10}
+; RUN:   grep "vspltish v.*, 10"
 
 define void @test(<8 x i16>* %P) {
         %tmp = load <8 x i16>* %P               ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
index 86fd947..73736c5 100644
--- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "foo r3, r4"
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "bari r3, 47"
 
 ; PR1351
 
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index 3489477..53231b4 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=fast | FileCheck %s
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 | FileCheck %s
 ; The first argument of subfc must not be the same as any other register.
 
 ; CHECK: subfc [[REG:r.]],
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index c141551..382ba1f 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
-; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
-; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
-; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 1
+; Check that tail merging is the default on ppc, and that -enable-tail-merge works.
 
 ; ModuleID = 'tail.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index 72e93a9..b85792c 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -1,7 +1,7 @@
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 
-; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; RUN: llc < %s -march=ppc32 | grep "rlwinm r3, r3, 23, 30, 30"
 ; PR1473
 
 define zeroext i8 @foo(i16 zeroext  %a)   {
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index 556a4a1c40..a60d11c 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -optimize-regalloc=0 -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index b3b9280..3d1a328 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -optimize-regalloc=0 -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index e03bd9e..e28a3e0 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast -optimize-regalloc=0
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 6a3c440..84aa40c 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 -disable-ppc-ilp-pref | FileCheck %s
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index 6b31397..0003a17 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 | FileCheck %s
 ; ModuleID = 'nn.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin11.0"
@@ -9,7 +9,9 @@ target triple = "powerpc-apple-darwin11.0"
 
 define void @foo() nounwind ssp {
 entry:
-; CHECK: mtctr r12
+; Better: mtctr r12
+; CHECK: mr r12, [[REG:r[0-9]+]]
+; CHECK: mtctr [[REG]]
   %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
   call void (...)* %0() nounwind
   br label %return
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index 6161b55..47d985c 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
 
 ; ModuleID = 'tsc.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
index c2e1d6b..7b1c464 100644
--- a/test/CodeGen/PowerPC/Frames-leaf.ll
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -1,35 +1,35 @@
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   not grep {stw r31, 20(r1)}
+; RUN:   not grep "stw r31, 20(r1)"
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   not grep {stwu r1, -.*(r1)}
+; RUN:   not grep "stwu r1, -.*(r1)"
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   not grep {addi r1, r1, }
+; RUN:   not grep "addi r1, r1, "
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   not grep {lwz r31, 20(r1)}
+; RUN:   not grep "lwz r31, 20(r1)"
 ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN:   not grep {stw r31, 20(r1)}
+; RUN:   not grep "stw r31, 20(r1)"
 ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN:   not grep {stwu r1, -.*(r1)}
+; RUN:   not grep "stwu r1, -.*(r1)"
 ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN:   not grep {addi r1, r1, }
+; RUN:   not grep "addi r1, r1, "
 ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN:   not grep {lwz r31, 20(r1)}
+; RUN:   not grep "lwz r31, 20(r1)"
 ; RUN: llc < %s -march=ppc64 | \
-; RUN:   not grep {std r31, 40(r1)}
+; RUN:   not grep "std r31, 40(r1)"
 ; RUN: llc < %s -march=ppc64 | \
-; RUN:   not grep {stdu r1, -.*(r1)}
+; RUN:   not grep "stdu r1, -.*(r1)"
 ; RUN: llc < %s -march=ppc64 | \
-; RUN:   not grep {addi r1, r1, }
+; RUN:   not grep "addi r1, r1, "
 ; RUN: llc < %s -march=ppc64 | \
-; RUN:   not grep {ld r31, 40(r1)}
+; RUN:   not grep "ld r31, 40(r1)"
 ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN:   not grep {stw r31, 40(r1)}
+; RUN:   not grep "stw r31, 40(r1)"
 ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN:   not grep {stdu r1, -.*(r1)}
+; RUN:   not grep "stdu r1, -.*(r1)"
 ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN:   not grep {addi r1, r1, }
+; RUN:   not grep "addi r1, r1, "
 ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN:   not grep {ld r31, 40(r1)}
+; RUN:   not grep "ld r31, 40(r1)"
 
 define i32* @f1() {
         %tmp = alloca i32, i32 2                ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index ecd5ecd..0f6bd10 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,26 +1,26 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
-; RUN: not grep {stw r31, -4(r1)} %t1
-; RUN: grep {stwu r1, -16448(r1)} %t1
-; RUN: grep {addi r1, r1, 16448} %t1
+; RUN: not grep "stw r31, -4(r1)" %t1
+; RUN: grep "stwu r1, -16448(r1)" %t1
+; RUN: grep "addi r1, r1, 16448" %t1
 ; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {lwz r31, -4(r1)}
+; RUN: not grep "lwz r31, -4(r1)"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t2
-; RUN: grep {stw r31, -4(r1)} %t2
-; RUN: grep {stwu r1, -16448(r1)} %t2
-; RUN: grep {addi r1, r1, 16448} %t2
-; RUN: grep {lwz r31, -4(r1)} %t2
+; RUN: grep "stw r31, -4(r1)" %t2
+; RUN: grep "stwu r1, -16448(r1)" %t2
+; RUN: grep "addi r1, r1, 16448" %t2
+; RUN: grep "lwz r31, -4(r1)" %t2
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
-; RUN: not grep {std r31, -8(r1)} %t3
-; RUN: grep {stdu r1, -16496(r1)} %t3
-; RUN: grep {addi r1, r1, 16496} %t3
-; RUN: not grep {ld r31, -8(r1)} %t3
+; RUN: not grep "std r31, -8(r1)" %t3
+; RUN: grep "stdu r1, -16496(r1)" %t3
+; RUN: grep "addi r1, r1, 16496" %t3
+; RUN: not grep "ld r31, -8(r1)" %t3
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t4
-; RUN: grep {std r31, -8(r1)} %t4
-; RUN: grep {stdu r1, -16512(r1)} %t4
-; RUN: grep {addi r1, r1, 16512} %t4
-; RUN: grep {ld r31, -8(r1)} %t4
+; RUN: grep "std r31, -8(r1)" %t4
+; RUN: grep "stdu r1, -16512(r1)" %t4
+; RUN: grep "addi r1, r1, 16512" %t4
+; RUN: grep "ld r31, -8(r1)" %t4
 
 define i32* @f1() {
         %tmp = alloca i32, i32 4095             ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 7b0d69c..6f985c8 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r4, 32751}
+; RUN:   grep "stw r4, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r4, 32751}
+; RUN:   grep "stw r4, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {std r4, 9024}
+; RUN:   grep "std r4, 9024"
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
index 932ad7a..de3aa7c 100644
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -fp-contract=fast | FileCheck %s
 
 %0 = type { double, double }
 
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
index 64a45e5..6fd484b 100644
--- a/test/CodeGen/PowerPC/and-imm.ll
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
+; RUN: llc < %s -march=ppc32 | not grep "ori\|lis"
 
 ; andi. r3, r3, 32769	
 define i32 @test(i32 %X) {
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
index 009f468..898ad7c 100644
--- a/test/CodeGen/PowerPC/big-endian-actual-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {addc 4, 4, 6}
+; RUN:   grep "addc 4, 4, 6"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {adde 3, 3, 5}
+; RUN:   grep "adde 3, 3, 5"
 
 define i64 @foo(i64 %x, i64 %y) {
   %z = add i64 %x, %y
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
index fe85404..760833c 100644
--- a/test/CodeGen/PowerPC/big-endian-call-result.ll
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {addic 4, 4, 1}
+; RUN:   grep "addic 4, 4, 1"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {addze 3, 3}
+; RUN:   grep "addze 3, 3"
 
 declare i64 @foo()
 
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index cc02e40..dda1538 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {b LBB.*} | count 4
+; RUN:   grep "b LBB.*" | count 4
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 29bcb20..dcdda57 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,11 +1,11 @@
 ; Test various forms of calls.
 
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {bl } | count 1
+; RUN:   grep "bl " | count 1
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {bctrl} | count 1
+; RUN:   grep "bctrl" | count 1
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {bla } | count 1
+; RUN:   grep "bla " | count 1
 
 declare void @foo()
 
diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll
new file mode 100644
index 0000000..cc80f83
--- /dev/null
+++ b/test/CodeGen/PowerPC/coalesce-ext.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=ppc64 -mtriple=powerpc64-apple-darwin < %s | FileCheck %s
+; Check that the peephole optimizer knows about sext and zext instructions.
+; CHECK: test1sext
+define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+  %C = add i64 %A, %B
+  ; CHECK: add [[SUM:r[0-9]+]], r3, r4
+  %D = trunc i64 %C to i32
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32
+  ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]]
+  store volatile i64 %F, i64 *%P2
+  ; CHECK: std [[EXT]]
+  store volatile i32 %D, i32* %P
+  ; Reuse low bits of extended register, don't extend live range of SUM.
+  ; CHECK: stw [[EXT]]
+  ret i32 %D
+}
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index 92d1dbe..94c5c02 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {cmpwi cr0, r3, -1}
+; RUN:   grep "cmpwi cr0, r3, -1"
 
 define i32 @test(i32 %x) nounwind {
         %c = icmp eq i32 %x, -1
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
index 8901e02..9efca91 100644
--- a/test/CodeGen/PowerPC/constants.ll
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep ori | count 3
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {li } | count 4
+; RUN:   grep "li " | count 4
 
 define i32 @f1() {
 entry:
diff --git a/test/CodeGen/PowerPC/ctrloop-reg.ll b/test/CodeGen/PowerPC/ctrloop-reg.ll
new file mode 100644
index 0000000..874e571
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-reg.ll
@@ -0,0 +1,87 @@
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211 = type { %union.v.0.48.90.114.120.138.144.150.156.162.168.174.180.210, i16, i16 }
+%union.v.0.48.90.114.120.138.144.150.156.162.168.174.180.210 = type { i64 }
+%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215 = type { i8*, i8*, i8*, i32, i8, i8, i64, %struct.stream_procs.2.50.92.116.122.140.146.152.158.164.170.176.182.212, i32, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, %struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i16, i32 }
+%struct.stream_procs.2.50.92.116.122.140.146.152.158.164.170.176.182.212 = type { i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i8)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i64*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i64)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)* }
+%struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213*, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213 = type { %struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213*, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, i32 }
+
+@special_ops = external global [7 x i32 (%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)*], align 8
+@ostack = external global [520 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@osbot = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@osp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@ostop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@osp_nargs = external global [6 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*], align 8
+@estack = external global [150 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@esp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@estop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@dstack = external global [20 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@dsp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@dstop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@name_errordict = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211
+@name_ErrorNames = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211
+@error_object = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211, align 8
+
+declare i32 @zadd(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zdup(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zexch(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zifelse(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zle(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zpop(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zsub(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare void @interp_init(i32) nounwind
+
+declare void @interp_fix_op(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* nocapture) nounwind
+
+define i32 @interpret(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* %pref, i32 %user_errors) nounwind {
+entry:
+  %erref = alloca %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211, align 8
+  br i1 undef, label %retry.us, label %retry
+
+retry.us:                                         ; preds = %if.end18, %retry, %retry, %retry, %retry, %entry
+  ret i32 undef
+
+retry:                                            ; preds = %if.end18, %entry
+  %0 = phi %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* [ null, %entry ], [ %erref, %if.end18 ]
+  %call = call i32 @interp(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* %0)
+  switch i32 %call, label %if.end18 [
+    i32 -3, label %retry.us
+    i32 -5, label %retry.us
+    i32 -16, label %retry.us
+    i32 -25, label %retry.us
+  ]
+
+if.end18:                                         ; preds = %retry
+  br i1 false, label %retry.us, label %retry
+}
+
+; CHECK: @interpret
+
+declare i32 @interp_exit(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* nocapture) nounwind readnone
+
+declare i32 @interp(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*) nounwind
+
+declare i32 @dict_lookup(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211**)
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @obj_compare(...)
+
+declare i32 @file_check_read(...)
+
+declare i32 @scan_token(...)
+
+declare i32 @file_close(...)
+
+declare void @sread_string(%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i8*, i32)
diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll
new file mode 100644
index 0000000..dcea06f
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-s000.ll
@@ -0,0 +1,156 @@
+; ModuleID = 'tsc_s000.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@Y = common global [16000 x double] zeroinitializer, align 32
+@X = common global [16000 x double] zeroinitializer, align 32
+@Z = common global [16000 x double] zeroinitializer, align 32
+@U = common global [16000 x double] zeroinitializer, align 32
+@V = common global [16000 x double] zeroinitializer, align 32
+@aa = common global [256 x [256 x double]] zeroinitializer, align 32
+@bb = common global [256 x [256 x double]] zeroinitializer, align 32
+@cc = common global [256 x [256 x double]] zeroinitializer, align 32
+@array = common global [65536 x double] zeroinitializer, align 32
+@x = common global [16000 x double] zeroinitializer, align 32
+@temp = common global double 0.000000e+00, align 8
+@temp_int = common global i32 0, align 4
+@a = common global [16000 x double] zeroinitializer, align 32
+@b = common global [16000 x double] zeroinitializer, align 32
+@c = common global [16000 x double] zeroinitializer, align 32
+@d = common global [16000 x double] zeroinitializer, align 32
+@e = common global [16000 x double] zeroinitializer, align 32
+@tt = common global [256 x [256 x double]] zeroinitializer, align 32
+@indx = common global [16000 x i32] zeroinitializer, align 32
+@xx = common global double* null, align 8
+@yy = common global double* null, align 8
+
+define i32 @s000() nounwind {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.end, %entry
+  %nl.010 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
+  %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 32, !tbaa !0
+  %add = fadd double %0, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+  store double %add, double* %arrayidx5, align 32, !tbaa !0
+  %indvars.iv.next11 = or i64 %indvars.iv, 1
+  %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
+  %1 = load double* %arrayidx.1, align 8, !tbaa !0
+  %add.1 = fadd double %1, 1.000000e+00
+  %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
+  store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0
+  %indvars.iv.next.112 = or i64 %indvars.iv, 2
+  %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
+  %2 = load double* %arrayidx.2, align 16, !tbaa !0
+  %add.2 = fadd double %2, 1.000000e+00
+  %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
+  store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0
+  %indvars.iv.next.213 = or i64 %indvars.iv, 3
+  %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
+  %3 = load double* %arrayidx.3, align 8, !tbaa !0
+  %add.3 = fadd double %3, 1.000000e+00
+  %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
+  store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0
+  %indvars.iv.next.314 = or i64 %indvars.iv, 4
+  %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
+  %4 = load double* %arrayidx.4, align 32, !tbaa !0
+  %add.4 = fadd double %4, 1.000000e+00
+  %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
+  store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0
+  %indvars.iv.next.415 = or i64 %indvars.iv, 5
+  %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
+  %5 = load double* %arrayidx.5, align 8, !tbaa !0
+  %add.5 = fadd double %5, 1.000000e+00
+  %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
+  store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0
+  %indvars.iv.next.516 = or i64 %indvars.iv, 6
+  %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
+  %6 = load double* %arrayidx.6, align 16, !tbaa !0
+  %add.6 = fadd double %6, 1.000000e+00
+  %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
+  store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0
+  %indvars.iv.next.617 = or i64 %indvars.iv, 7
+  %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
+  %7 = load double* %arrayidx.7, align 8, !tbaa !0
+  %add.7 = fadd double %7, 1.000000e+00
+  %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
+  store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0
+  %indvars.iv.next.718 = or i64 %indvars.iv, 8
+  %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
+  %8 = load double* %arrayidx.8, align 32, !tbaa !0
+  %add.8 = fadd double %8, 1.000000e+00
+  %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
+  store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0
+  %indvars.iv.next.819 = or i64 %indvars.iv, 9
+  %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
+  %9 = load double* %arrayidx.9, align 8, !tbaa !0
+  %add.9 = fadd double %9, 1.000000e+00
+  %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
+  store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0
+  %indvars.iv.next.920 = or i64 %indvars.iv, 10
+  %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
+  %10 = load double* %arrayidx.10, align 16, !tbaa !0
+  %add.10 = fadd double %10, 1.000000e+00
+  %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
+  store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0
+  %indvars.iv.next.1021 = or i64 %indvars.iv, 11
+  %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
+  %11 = load double* %arrayidx.11, align 8, !tbaa !0
+  %add.11 = fadd double %11, 1.000000e+00
+  %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
+  store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0
+  %indvars.iv.next.1122 = or i64 %indvars.iv, 12
+  %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
+  %12 = load double* %arrayidx.12, align 32, !tbaa !0
+  %add.12 = fadd double %12, 1.000000e+00
+  %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
+  store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0
+  %indvars.iv.next.1223 = or i64 %indvars.iv, 13
+  %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
+  %13 = load double* %arrayidx.13, align 8, !tbaa !0
+  %add.13 = fadd double %13, 1.000000e+00
+  %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
+  store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0
+  %indvars.iv.next.1324 = or i64 %indvars.iv, 14
+  %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
+  %14 = load double* %arrayidx.14, align 16, !tbaa !0
+  %add.14 = fadd double %14, 1.000000e+00
+  %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
+  store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0
+  %indvars.iv.next.1425 = or i64 %indvars.iv, 15
+  %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
+  %15 = load double* %arrayidx.15, align 8, !tbaa !0
+  %add.15 = fadd double %15, 1.000000e+00
+  %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
+  store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+  br i1 %exitcond.15, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %for.body3
+  %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+  %inc7 = add nsw i32 %nl.010, 1
+  %exitcond = icmp eq i32 %inc7, 400000
+  br i1 %exitcond, label %for.end8, label %for.cond1.preheader
+
+for.end8:                                         ; preds = %for.end
+  ret i32 0
+
+; CHECK: @s000
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll
new file mode 100644
index 0000000..eae8c38
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -0,0 +1,134 @@
+; ModuleID = 'SingleSource/Regression/C/sumarray2d.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@.str = private unnamed_addr constant [23 x i8] c"Sum(Array[%d,%d] = %d\0A\00", align 1
+
+define i32 @SumArray([100 x i32]* nocapture %Array, i32 %NumI, i32 %NumJ) nounwind readonly {
+entry:
+  %cmp12 = icmp eq i32 %NumI, 0
+  br i1 %cmp12, label %for.end8, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp29 = icmp eq i32 %NumJ, 0
+  br i1 %cmp29, label %for.inc6, label %for.body3.lr.ph.us
+
+for.inc6.us:                                      ; preds = %for.body3.us
+  %indvars.iv.next17 = add i64 %indvars.iv16, 1
+  %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+  %exitcond19 = icmp eq i32 %lftr.wideiv18, %NumI
+  br i1 %exitcond19, label %for.end8, label %for.body3.lr.ph.us
+
+for.body3.us:                                     ; preds = %for.body3.us, %for.body3.lr.ph.us
+  %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+  %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
+  %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
+  %0 = load i32* %arrayidx5.us, align 4, !tbaa !0
+  %add.us = add nsw i32 %0, %Result.111.us
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %NumJ
+  br i1 %exitcond, label %for.inc6.us, label %for.body3.us
+
+for.body3.lr.ph.us:                               ; preds = %for.inc6.us, %for.cond1.preheader.lr.ph
+  %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  %Result.014.us = phi i32 [ %add.us, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  br label %for.body3.us
+
+for.inc6:                                         ; preds = %for.inc6, %for.cond1.preheader.lr.ph
+  %i.013 = phi i32 [ %inc7, %for.inc6 ], [ 0, %for.cond1.preheader.lr.ph ]
+  %inc7 = add i32 %i.013, 1
+  %exitcond20 = icmp eq i32 %inc7, %NumI
+  br i1 %exitcond20, label %for.end8, label %for.inc6
+
+for.end8:                                         ; preds = %for.inc6.us, %for.inc6, %entry
+  %Result.0.lcssa = phi i32 [ 0, %entry ], [ %add.us, %for.inc6.us ], [ 0, %for.inc6 ]
+  ret i32 %Result.0.lcssa
+; CHECK: @SumArray
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+define i32 @main() nounwind {
+entry:
+  %Array = alloca [100 x [100 x i32]], align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ]
+  %0 = trunc i64 %indvars.iv33 to i32
+  %sub = sub i32 0, %0
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
+  store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next34 = add i64 %indvars.iv33, 1
+  %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
+  %exitcond36 = icmp eq i32 %lftr.wideiv35, 100
+  br i1 %exitcond36, label %for.cond6.preheader, label %for.body
+
+for.cond6.preheader:                              ; preds = %for.body, %for.inc17
+  %indvars.iv29 = phi i64 [ %indvars.iv.next30, %for.inc17 ], [ 0, %for.body ]
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.inc14, %for.cond6.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond6.preheader ], [ %indvars.iv.next, %for.inc14 ]
+  %1 = trunc i64 %indvars.iv to i32
+  %2 = trunc i64 %indvars.iv29 to i32
+  %cmp9 = icmp eq i32 %1, %2
+  br i1 %cmp9, label %for.inc14, label %if.then
+
+if.then:                                          ; preds = %for.body8
+  %3 = add i64 %indvars.iv, %indvars.iv29
+  %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
+  %4 = trunc i64 %3 to i32
+  store i32 %4, i32* %arrayidx13, align 4, !tbaa !0
+  br label %for.inc14
+
+for.inc14:                                        ; preds = %for.body8, %if.then
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv27 = trunc i64 %indvars.iv.next to i32
+  %exitcond28 = icmp eq i32 %lftr.wideiv27, 100
+  br i1 %exitcond28, label %for.inc17, label %for.body8
+
+for.inc17:                                        ; preds = %for.inc14
+  %indvars.iv.next30 = add i64 %indvars.iv29, 1
+  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
+  %exitcond32 = icmp eq i32 %lftr.wideiv31, 100
+  br i1 %exitcond32, label %for.body3.lr.ph.us.i, label %for.cond6.preheader
+
+for.inc6.us.i:                                    ; preds = %for.body3.us.i
+  %indvars.iv.next17.i = add i64 %indvars.iv16.i, 1
+  %lftr.wideiv24 = trunc i64 %indvars.iv.next17.i to i32
+  %exitcond25 = icmp eq i32 %lftr.wideiv24, 100
+  br i1 %exitcond25, label %SumArray.exit, label %for.body3.lr.ph.us.i
+
+for.body3.us.i:                                   ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i
+  %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
+  %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
+  %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
+  %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0
+  %add.us.i = add nsw i32 %5, %Result.111.us.i
+  %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %for.inc6.us.i, label %for.body3.us.i
+
+for.body3.lr.ph.us.i:                             ; preds = %for.inc17, %for.inc6.us.i
+  %indvars.iv16.i = phi i64 [ %indvars.iv.next17.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
+  %Result.014.us.i = phi i32 [ %add.us.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
+  br label %for.body3.us.i
+
+SumArray.exit:                                    ; preds = %for.inc6.us.i
+  %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
+  ret i32 0
+
+; CHECK: @main
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
new file mode 100644
index 0000000..4b6f7b9
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -0,0 +1,79 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@a = common global i32 0, align 4
+
+define void @test1(i32 %c) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %add = add nsw i32 %0, %c
+  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test1
+; CHECK-NOT: or 3, 3, 3
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+define void @test2(i32 %c, i32 %d) nounwind {
+entry:
+  %cmp1 = icmp sgt i32 %d, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %add = add nsw i32 %0, %c
+  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %d
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test2
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+define void @test3(i32 %c, i32 %d) nounwind {
+entry:
+  %cmp1 = icmp sgt i32 %d, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %mul = mul nsw i32 %i.02, %c
+  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %add = add nsw i32 %0, %mul
+  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %d
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test3
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
index af23369..56f7782 100644
--- a/test/CodeGen/PowerPC/darwin-labels.ll
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {foo bar":}
+; RUN: llc < %s | grep 'foo bar":'
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 6ef740f..ddcce745 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1"
 
 define double @fabs(double %f) {
 entry:
-	%tmp2 = tail call double @fabs( double %f )		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %f ) readnone	; <double> [#uses=1]
 	ret double %tmp2
 }
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 815c72c..27496f7 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 | \
-; RUN:   egrep {fn?madd|fn?msub} | count 8
+; RUN: llc < %s -march=ppc32 -fp-contract=fast | \
+; RUN:   egrep "fn?madd|fn?msub" | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index bbd5c71..9fa2dcb 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -3,7 +3,7 @@
 declare double @fabs(double)
 
 define double @test(double %X) {
-        %Y = call double @fabs( double %X )             ; <double> [#uses=1]
+        %Y = call double @fabs( double %X ) readnone     ; <double> [#uses=1]
         %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]
         ret double %Z
 }
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index 74a8725..bf8c4a2 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -2,13 +2,13 @@
 ; otherwise.
 
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
-; RUN:   grep {fsqrt f1, f1}
+; RUN:   grep "fsqrt f1, f1"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN:   grep {fsqrt f1, f1}
+; RUN:   grep "fsqrt f1, f1"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
-; RUN:   not grep {fsqrt f1, f1}
+; RUN:   not grep "fsqrt f1, f1"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
-; RUN:   not grep {fsqrt f1, f1}
+; RUN:   not grep "fsqrt f1, f1"
 
 declare double @llvm.sqrt.f64(double)
 
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index a43f09c..7d089bb 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -stats |& \
-; RUN:   grep {4 .*Number of machine instrs printed}
+; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
+; RUN:   grep "4 .*Number of machine instrs printed"
 
 ;; Integer absolute value, should produce something as good as:
 ;;      srawi r2, r3, 31
diff --git a/test/CodeGen/PowerPC/isel.ll b/test/CodeGen/PowerPC/isel.ll
new file mode 100644
index 0000000..ed494c5
--- /dev/null
+++ b/test/CodeGen/PowerPC/isel.ll
@@ -0,0 +1,23 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+	%p = icmp uge i64 %a, %b
+	%x = select i1 %p, i64 %c, i64 %d
+	ret i64 %x
+; CHECK: @test1
+; CHECK: isel
+}
+
+define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%p = icmp uge i32 %a, %b
+	%x = select i1 %p, i32 %c, i32 %d
+	ret i32 %x
+; CHECK: @test2
+; CHECK: isel
+}
+
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
index 4161e34..78cdf4a 100644
--- a/test/CodeGen/PowerPC/ispositive.ll
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {srwi r3, r3, 31}
+; RUN:   grep "srwi r3, r3, 31"
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
new file mode 100644
index 0000000..12f1d1f
--- /dev/null
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -0,0 +1,49 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
+entry:
+  %0 = load i8** undef, align 8, !tbaa !0
+  br i1 undef, label %return, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  br i1 undef, label %if.end7, label %return
+
+if.end7:                                          ; preds = %lor.lhs.false
+  br i1 undef, label %if.then15, label %if.end71
+
+if.then15:                                        ; preds = %if.end7
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %if.then15
+  %idxprom17 = sext i32 0 to i64
+  %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
+  %or = or i32 undef, undef
+  br i1 %cond1, label %if.end71, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 %cond2, label %while.cond, label %if.then45
+
+if.then45:                                        ; preds = %while.body
+  %idxprom48139 = zext i32 %or to i64
+  %arrayidx49 = getelementptr inbounds i8* %0, i64 %idxprom48139
+  %1 = bitcast i8* %arrayidx49 to i16*
+  %2 = bitcast i8* %arrayidx18 to i16*
+  %3 = load i16* %1, align 1
+  store i16 %3, i16* %2, align 1
+  br label %return
+
+if.end71:                                         ; preds = %while.cond, %if.end7
+  unreachable
+
+return:                                           ; preds = %if.then45, %lor.lhs.false, %entry
+  ret void
+
+; CHECK: @allocateSpace
+; CHECK: lbzux
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 94c2526..915595f 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 | grep cntlzw 
 ; RUN: llc < %s -march=ppc32 | not grep xori 
-; RUN: llc < %s -march=ppc32 | not grep {li }
-; RUN: llc < %s -march=ppc32 | not grep {mr }
+; RUN: llc < %s -march=ppc32 | not grep "li "
+; RUN: llc < %s -march=ppc32 | not grep "mr "
 
 define i1 @test(i64 %x) {
   %tmp = icmp ult i64 %x, 4294967296
diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
index f441e42..42472c5 100644
--- a/test/CodeGen/PowerPC/lsr-postinc-pos.ll
+++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -print-lsr-output |& FileCheck %s
+; RUN: llc < %s -print-lsr-output 2>&1 | FileCheck %s
 
 ; The icmp is a post-inc use, and the increment is in %bb11, but the
 ; scevgep needs to be inserted in %bb so that it is dominated by %t.
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 17e7e28..39af11a 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep addi
-; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll
index 3459413..6320e28 100644
--- a/test/CodeGen/PowerPC/no-dead-strip.ll
+++ b/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {no_dead_strip.*_X}
+; RUN: llc < %s | grep "no_dead_strip.*_X"
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
index 1fad2fa..77b726c 100644
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -fp-contract=fast | FileCheck %s
 
 %0 = type { double, double }
 
diff --git a/test/CodeGen/PowerPC/ppc64-cyclecounter.ll b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
new file mode 100644
index 0000000..38406ca
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i64 @test1() nounwind {
+entry:
+  %r = call i64 @llvm.readcyclecounter()
+  ret i64 %r
+}
+
+; CHECK: @test1
+; CHECK: mfspr 3, 268
+
+declare i64 @llvm.readcyclecounter()
+
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index cf16b4c..c931dfe 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | grep mflr
 ; RUN: llc < %s -march=ppc32 | grep lwz
-; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
+; RUN: llc < %s -march=ppc64 | grep "ld r., 16(r1)"
 
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index 6410c63..3f90008 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 | grep rlwimi
-; RUN: llc < %s -march=ppc32 | not grep {or }
+; RUN: llc < %s -march=ppc32 | not grep "or "
 
 ; Make sure there is no register-register copies here.
 
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 05d37bf..7efdbe9 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -stats |& \
-; RUN:   grep {Number of machine instrs printed} | grep 12
+; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
+; RUN:   grep "Number of machine instrs printed" | grep 12
 
 define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
 	%tmp1 = shl i32 %srcA, 15		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 688b29a..7319583 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {srwi r., r., 5}
+; RUN:   grep "srwi r., r., 5"
 
 define i32 @eq0(i32 %a) {
         %tmp.1 = icmp eq i32 %a, 0              ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index b4767b0..19ca098 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
+; RUN: llc < %s -march=ppc32 | not grep "extsh\|rlwinm"
 
 declare signext i16 @foo()  
 
diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll
index 2020361..810630f6 100644
--- a/test/CodeGen/PowerPC/stack-protector.ll
+++ b/test/CodeGen/PowerPC/stack-protector.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_guard}
-; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_fail}
+; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_guard"
+; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_fail"
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/stwu-gta.ll b/test/CodeGen/PowerPC/stwu-gta.ll
new file mode 100644
index 0000000..4febe7e
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwu-gta.ll
@@ -0,0 +1,22 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+; RUN: llc < %s | FileCheck %s
+
+%class.Two.0.5 = type { i32, i32, i32 }
+
+@foo = external global %class.Two.0.5, align 4
+
+define void @_GLOBAL__I_a() nounwind section ".text.startup" {
+entry:
+  store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4, !tbaa !0
+  store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4, !tbaa !0
+  ret void
+}
+
+; CHECK: @_GLOBAL__I_a
+; CHECK-NOT: stwux
+; CHECK: stwu
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
new file mode 100644
index 0000000..897bfc6
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%class.spell_checker.21.103.513.538 = type { %"class.std::map.20.102.512.537" }
+%"class.std::map.20.102.512.537" = type { %"class.std::_Rb_tree.19.101.511.536" }
+%"class.std::_Rb_tree.19.101.511.536" = type { %"struct.std::_Rb_tree<std::pair<const char *, const char *>, std::pair<const std::pair<const char *, const char *>, int>, std::_Select1st<std::pair<const std::pair<const char *, const char *>, int>>, std::less<std::pair<const char *, const char *>>, std::allocator<std::pair<const std::pair<const char *, const char *>, int>> >::_Rb_tree_impl.18.100.510.535" }
+%"struct.std::_Rb_tree<std::pair<const char *, const char *>, std::pair<const std::pair<const char *, const char *>, int>, std::_Select1st<std::pair<const std::pair<const char *, const char *>, int>>, std::less<std::pair<const char *, const char *>>, std::allocator<std::pair<const std::pair<const char *, const char *>, int>> >::_Rb_tree_impl.18.100.510.535" = type { %"struct.std::less.16.98.508.533", %"struct.std::_Rb_tree_node_base.17.99.509.534", i64 }
+%"struct.std::less.16.98.508.533" = type { i8 }
+%"struct.std::_Rb_tree_node_base.17.99.509.534" = type { i32, %"struct.std::_Rb_tree_node_base.17.99.509.534"*, %"struct.std::_Rb_tree_node_base.17.99.509.534"*, %"struct.std::_Rb_tree_node_base.17.99.509.534"* }
+
+define void @test1(%class.spell_checker.21.103.513.538* %this) unnamed_addr align 2 {
+entry:
+  %_M_header.i.i.i.i.i.i = getelementptr inbounds %class.spell_checker.21.103.513.538* %this, i64 0, i32 0, i32 0, i32 0, i32 1
+  %0 = bitcast %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 4, i1 false) nounwind
+  store %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i, %"struct.std::_Rb_tree_node_base.17.99.509.534"** undef, align 8, !tbaa !0
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: stwu
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stwux.ll b/test/CodeGen/PowerPC/stwux.ll
new file mode 100644
index 0000000..737e9d9
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwux.ll
@@ -0,0 +1,47 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+@multvec_i = external unnamed_addr global [100 x i32], align 4
+
+define fastcc void @subs_STMultiExceptIntern() nounwind {
+entry:
+  br i1 undef, label %while.body.lr.ph, label %return
+
+while.body.lr.ph:                                 ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %if.end12, %while.body.lr.ph
+  %i.0240 = phi i32 [ -1, %while.body.lr.ph ], [ %i.1, %if.end12 ]
+  br i1 undef, label %if.end12, label %if.then
+
+if.then:                                          ; preds = %while.body
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.then, %while.body
+  %i.1 = phi i32 [ %i.0240, %while.body ], [ undef, %if.then ]
+  br i1 undef, label %while.body, label %while.end
+
+while.end:                                        ; preds = %if.end12
+  br i1 undef, label %return, label %if.end15
+
+if.end15:                                         ; preds = %while.end
+  %idxprom.i.i230 = sext i32 %i.1 to i64
+  %arrayidx18 = getelementptr inbounds [100 x i32]* @multvec_i, i64 0, i64 %idxprom.i.i230
+  store i32 0, i32* %arrayidx18, align 4
+  br i1 undef, label %while.body21, label %while.end90
+
+while.body21:                                     ; preds = %if.end15
+  unreachable
+
+while.end90:                                      ; preds = %if.end15
+  store i32 0, i32* %arrayidx18, align 4
+  br label %return
+
+return:                                           ; preds = %while.end90, %while.end, %entry
+  ret void
+
+; CHECK: @subs_STMultiExceptIntern
+; CHECK: stwux
+}
+
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
new file mode 100644
index 0000000..713893b
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -0,0 +1,16 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK:          localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
+;CHECK-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
+;CHECK-NEXT:     stw [[REG2]], 0([[REG1]])
+  store i32 42, i32* @a, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 91b2011..3ea46f5 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
+; RUN: llc < %s -march=ppc32 | grep "__trampoline_setup"
 
 module asm "\09.lazy_reference .objc_class_name_NSImageRep"
 module asm "\09.objc_class_name_NSBitmapImageRep=0"
diff --git a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
index 015c086..7e58ec0 100644
--- a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
+++ b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec  | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec -disable-ppc-ilp-pref  | FileCheck %s
 ; Formerly this did byte loads and word stores.
 @a = external global <16 x i8>
 @b = external global <16 x i8>
diff --git a/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll b/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll
new file mode 100644
index 0000000..a607f10
--- /dev/null
+++ b/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll
@@ -0,0 +1,13 @@
+; Just check that this doesn't crash:
+; RUN: llc < %s
+; PR2960
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "sparc-unknown-linux-gnu"
+	%"5tango4core9Exception11IOException" = type { [5 x i8*]*, i8*, { i64, i8* }, { i64, i8* }, i64, %"6Object7Monitor"*, %"5tango4core9Exception11IOException"* }
+	%"6Object7Monitor" = type { [3 x i8*]*, i8* }
+
+define fastcc %"5tango4core9Exception11IOException"* @_D5tango4core9Exception13TextException5_ctorMFAaZC5tango4core9Exception13TextException(%"5tango4core9Exception11IOException"* %this, { i64, i8* } %msg) {
+entry_tango.core.Exception.TextException.this:
+	unreachable
+}
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index f06ccd0..38cea4c 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -1,14 +1,11 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s  -march=sparc > %t
-; RUN: grep .foo: %t
-; RUN: grep call.*\.foo %t
-; RUN: grep .baz: %t
-; RUN: grep ld.*\.baz %t
+; RUN: llc < %s  -march=sparc | FileCheck %s
 
 define private void @foo() {
         ret void
 }
+; CHECK: [[FOO:\..*foo]]:
 
 @baz = private global i32 4
 
@@ -17,3 +14,8 @@ define i32 @bar() {
 	%1 = load i32* @baz, align 4
         ret i32 %1
 }
+
+; CHECK: call [[FOO]]
+; CHECK: ld {{.+}}[[BAZ:\..*baz]]
+
+; CHECK: [[BAZ]]
diff --git a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
new file mode 100644
index 0000000..a4c05d2
--- /dev/null
+++ b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=thumbv6-apple-ios -mcpu=cortex-m0 < %s | FileCheck %s
+; Cortex-M0 doesn't have 32-bit Thumb2 instructions (except for dmb, mrs, etc.)
+; rdar://11331541
+
+define i32 @t(i32 %a) nounwind {
+; CHECK: t:
+; CHECK: asrs [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], #31
+; CHECK: eors [[REG1]], [[REG2]]
+  %tmp0 = ashr i32 %a, 31
+  %tmp1 = xor i32 %tmp0, %a
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
index f73f93d..18e11ba 100644
--- a/test/CodeGen/Thumb/asmprinter-bug.ll
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep "#0"
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
index 0cac755..6cc4dd1 100644
--- a/test/CodeGen/Thumb/frame_thumb.ll
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumb-apple-darwin \
-; RUN:     -disable-fp-elim | not grep {r11}
+; RUN:     -disable-fp-elim | not grep "r11"
 ; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
-; RUN:     -disable-fp-elim | not grep {r11}
+; RUN:     -disable-fp-elim | not grep "r11"
 
 define i32 @f() {
 entry:
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index d03b5b2..2e77660 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -stats |& \
-; RUN:   grep {4 .*Number of machine instrs printed}
+; RUN: llc < %s -march=thumb -stats 2>&1 | \
+; RUN:   grep "4 .*Number of machine instrs printed"
 
 ;; Integer absolute value, should produce something as good as:
 ;; Thumb:
diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
index af7d716..348e9d3 100644
--- a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
+++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -1,4 +1,4 @@
-; RUN: llc -relocation-model=pic < %s | grep {:$} | sort | uniq -d | count 0
+; RUN: llc -relocation-model=pic < %s | grep ":$" | sort | uniq -d | count 0
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
index 19d2385..255b709 100644
--- a/test/CodeGen/Thumb2/constant-islands.ll
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
 ; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
-; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -verify-machineinstrs -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -verify-machineinstrs -o %t.o
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/Thumb2/inflate-regs.ll b/test/CodeGen/Thumb2/inflate-regs.ll
new file mode 100644
index 0000000..d8a558c
--- /dev/null
+++ b/test/CodeGen/Thumb2/inflate-regs.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+; CHECK: local_split
+;
+; The load must go into d0-15 which are all clobbered by the asm.
+; RAGreedy should split the range and use d16-d31 to avoid a spill.
+;
+; CHECK: vldr s
+; CHECK-NOT: vstr
+; CHECK: vadd.f32
+; CHECK-NOT: vstr
+; CHECK: vorr
+; CHECK: vstr s
+define void @local_split(float* nocapture %p) nounwind ssp {
+entry:
+  %x = load float* %p, align 4
+  %a = fadd float %x, 1.0
+  tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  store float %a, float* %p, align 4
+  ret void
+}
+
+; CHECK: global_split
+;
+; Same thing, but across basic blocks.
+;
+; CHECK: vldr s
+; CHECK-NOT: vstr
+; CHECK: vadd.f32
+; CHECK-NOT: vstr
+; CHECK: vorr
+; CHECK: vstr s
+define void @global_split(float* nocapture %p1, float* nocapture %p2) nounwind ssp {
+entry:
+  %0 = load float* %p1, align 4
+  %add = fadd float %0, 1.000000e+00
+  tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  %cmp = fcmp ogt float %add, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store float %add, float* %p2, align 4
+  br label %if.end
+
+if.end:
+  store float %add, float* %p1, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/inlineasm.ll b/test/CodeGen/Thumb2/inlineasm.ll
new file mode 100644
index 0000000..30f28f8
--- /dev/null
+++ b/test/CodeGen/Thumb2/inlineasm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -filetype=obj
+
+target triple = "thumbv7-none--eabi"
+
+define void @t1() nounwind {
+entry:
+  call void asm sideeffect "mov r0, r1", ""() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index aef6f85..61c477a 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -3,17 +3,18 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 target triple = "thumbv7-apple-ios0.0.0"
 
 ; This test case would clobber the outgoing call arguments by writing to the
-; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+; emergency spill slots at [sp, #4] or [sp, #8] without adjusting the stack
+; pointer first.
 
 ; CHECK: main
 ; CHECK: vmov.f64
 ; Adjust SP for the large call
 ; CHECK: sub sp,
-; CHECK: mov [[FR:r[0-9]+]], sp
-; Store to call frame + #4
-; CHECK: str{{.*\[}}[[FR]], #4]
+; Store to call frame + #8
+; CHECK: vstr{{.*\[}}sp, #8]
 ; Don't clobber that store until the call.
 ; CHECK-NOT: [sp, #4]
+; CHECK-NOT: [sp, #8]
 ; CHECK: variadic
 
 define i32 @main() ssp {
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index df221b9..67b07e6 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests could be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
@@ -9,7 +9,7 @@ define i1 @f1(i32 %a, i32 %b) {
     ret i1 %tmp
 }
 ; CHECK: f1:
-; CHECK: 	cmn.w	r0, r1
+; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f2(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
@@ -17,7 +17,7 @@ define i1 @f2(i32 %a, i32 %b) {
     ret i1 %tmp
 }
 ; CHECK: f2:
-; CHECK: 	cmn.w	r0, r1
+; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f3(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
@@ -25,7 +25,7 @@ define i1 @f3(i32 %a, i32 %b) {
     ret i1 %tmp
 }
 ; CHECK: f3:
-; CHECK: 	cmn.w	r0, r1
+; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f4(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
@@ -33,7 +33,7 @@ define i1 @f4(i32 %a, i32 %b) {
     ret i1 %tmp
 }
 ; CHECK: f4:
-; CHECK: 	cmn.w	r0, r1
+; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f5(i32 %a, i32 %b) {
     %tmp = shl i32 %b, 5
@@ -42,7 +42,7 @@ define i1 @f5(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 ; CHECK: f5:
-; CHECK: 	cmn.w	r0, r1, lsl #5
+; CHECK: 	cmn.w	{{.*}}, r1, lsl #5
 
 define i1 @f6(i32 %a, i32 %b) {
     %tmp = lshr i32 %b, 6
@@ -51,7 +51,7 @@ define i1 @f6(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 ; CHECK: f6:
-; CHECK: 	cmn.w	r0, r1, lsr #6
+; CHECK: 	cmn.w	{{.*}}, r1, lsr #6
 
 define i1 @f7(i32 %a, i32 %b) {
     %tmp = ashr i32 %b, 7
@@ -60,7 +60,7 @@ define i1 @f7(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 ; CHECK: f7:
-; CHECK: 	cmn.w	r0, r1, asr #7
+; CHECK: 	cmn.w	{{.*}}, r1, asr #7
 
 define i1 @f8(i32 %a, i32 %b) {
     %l8 = shl i32 %a, 24
@@ -71,5 +71,15 @@ define i1 @f8(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 ; CHECK: f8:
-; CHECK: 	cmn.w	r0, r0, ror #8
+; CHECK: 	cmn.w	{{.*}}, {{.*}}, ror #8
+
 
+define void @f9(i32 %a, i32 %b) nounwind optsize {
+  tail call void asm sideeffect "cmn.w     r0, r1", ""() nounwind, !srcloc !0
+  ret void
+}
+
+!0 = metadata !{i32 81}
+
+; CHECK: f9:
+; CHECK: 	cmn.w	r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index da12114..4ce7acc 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
 ; CHECK: f1:
-; CHECK: cmp r0, #187
+; CHECK: cmp {{.*}}, #187
     %tmp = icmp ne i32 %a, 187
     ret i1 %tmp
 }
@@ -14,7 +14,7 @@ define i1 @f1(i32 %a) {
 ; 0x00aa00aa = 11141290
 define i1 @f2(i32 %a) {
 ; CHECK: f2:
-; CHECK: cmp.w r0, #11141290
+; CHECK: cmp.w {{.*}}, #11141290
     %tmp = icmp eq i32 %a, 11141290 
     ret i1 %tmp
 }
@@ -22,7 +22,7 @@ define i1 @f2(i32 %a) {
 ; 0xcc00cc00 = 3422604288
 define i1 @f3(i32 %a) {
 ; CHECK: f3:
-; CHECK: cmp.w r0, #-872363008
+; CHECK: cmp.w {{.*}}, #-872363008
     %tmp = icmp ne i32 %a, 3422604288
     ret i1 %tmp
 }
@@ -30,7 +30,7 @@ define i1 @f3(i32 %a) {
 ; 0xdddddddd = 3722304989
 define i1 @f4(i32 %a) {
 ; CHECK: f4:
-; CHECK: cmp.w r0, #-572662307
+; CHECK: cmp.w {{.*}}, #-572662307
     %tmp = icmp ne i32 %a, 3722304989
     ret i1 %tmp
 }
@@ -38,7 +38,7 @@ define i1 @f4(i32 %a) {
 ; 0x00110000 = 1114112
 define i1 @f5(i32 %a) {
 ; CHECK: f5:
-; CHECK: cmp.w r0, #1114112
+; CHECK: cmp.w {{.*}}, #1114112
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
 }
@@ -46,7 +46,7 @@ define i1 @f5(i32 %a) {
 ; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
 ;
 ; CHECK: f6:
-; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK-NOT: cmp.w {{.*}}, #-2147483648
 ; CHECK: bx lr
 define i32 @f6(i32 %a) {
     %tmp = icmp sgt i32 %a, 2147483647
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 15052e0..f6790de 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
-; CHECK: cmp r0, r1
+; CHECK: cmp {{.*}}, r1
     %tmp = icmp ne i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f2(i32 %a, i32 %b) {
 ; CHECK: f2:
-; CHECK: cmp r0, r1
+; CHECK: cmp {{.*}}, r1
     %tmp = icmp eq i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f6(i32 %a, i32 %b) {
 ; CHECK: f6:
-; CHECK: cmp.w r0, r1, lsl #5
+; CHECK: cmp.w {{.*}}, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = icmp eq i32 %tmp, %a
     ret i1 %tmp1
@@ -27,7 +27,7 @@ define i1 @f6(i32 %a, i32 %b) {
 
 define i1 @f7(i32 %a, i32 %b) {
 ; CHECK: f7:
-; CHECK: cmp.w r0, r1, lsr #6
+; CHECK: cmp.w {{.*}}, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = icmp ne i32 %tmp, %a
     ret i1 %tmp1
@@ -35,7 +35,7 @@ define i1 @f7(i32 %a, i32 %b) {
 
 define i1 @f8(i32 %a, i32 %b) {
 ; CHECK: f8:
-; CHECK: cmp.w r0, r1, asr #7
+; CHECK: cmp.w {{.*}}, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = icmp eq i32 %a, %tmp
     ret i1 %tmp1
@@ -43,7 +43,7 @@ define i1 @f8(i32 %a, i32 %b) {
 
 define i1 @f9(i32 %a, i32 %b) {
 ; CHECK: f9:
-; CHECK: cmp.w r0, r0, ror #8
+; CHECK: cmp.w {{.*}}, {{.*}}, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index 7e1655f..0748b9b3 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -1,9 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | FileCheck %s
 
 ; Do not use tbb / tbh if any destination is before the jumptable.
 ; rdar://7102917
 
 define i16 @main__getopt_internal_2E_exit_2E_ce(i32, i1 %b) nounwind {
+; CHECK: main__getopt_internal_2E_exit_2E_ce
+; CHECK-NOT: tbb
+; CHECK-NOT: tbh
+; 32-bit jump tables use explicit branches, not data regions, so make sure
+; we don't annotate this region.
+; CHECK-NOT: data_region
 entry:
   br i1 %b, label %codeRepl127.exitStub, label %newFuncRoot
 
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index d1af4ba..2178eec 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {ldr.*\\\[.*\],} | count 1
+; RUN:   grep "ldr.*\[.*\]," | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index 9cc3f4a..601c0b5 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {ldr.*\\!} | count 3
+; RUN:   grep "ldr.*\!" | count 3
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {ldrsb.*\\!} | count 1
+; RUN:   grep "ldrsb.*\!" | count 1
 
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll
index 39b6ac3..10cd539 100644
--- a/test/CodeGen/Thumb2/thumb2-rev16.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -1,7 +1,7 @@
 ; XFAIL: *
 ; fixme rev16 pattern is not matching
 
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1
 
 ; 0xff00ff00 = 4278255360
 ; 0x00ff00ff = 16711935
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 590c333..5ad92cd 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1
 
 ; CHECK: f1:
 ; CHECK: 	ror.w	r0, r0, #22
@@ -13,6 +13,8 @@ define i32 @f1(i32 %a) {
 ; CHECK: f2:
 ; CHECK-NOT: and
 ; CHECK: ror
+; THUMB1: f2
+; THUMB1: and
 define i32 @f2(i32 %v, i32 %nbits) {
 entry:
   %and = and i32 %nbits, 31
@@ -21,4 +23,4 @@ entry:
   %shl = shl i32 %v, %sub
   %or = or i32 %shl, %shr
   ret i32 %or
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
index 5dc3cc3..a9d71d6 100644
--- a/test/CodeGen/Thumb2/thumb2-tbb.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -5,7 +5,9 @@ define void @bar(i32 %n.u) {
 entry:
 ; CHECK: bar:
 ; CHECK: tbb
-; CHECK: .align 1
+; CHECK: .data_region jt8
+; CHECK: .end_data_region
+; CHECK-NEXT: .align 1
 
     switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
 bb:
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 00c928f..d453f46 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
 
 ; 0x000000bb = 187
 define i1 @f2(i32 %a) {
@@ -10,7 +10,7 @@ define i1 @f2(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f2:
-; CHECK: 	teq.w	r0, #187
+; CHECK: 	teq.w	{{.*}}, #187
 
 ; 0x00aa00aa = 11141290
 define i1 @f3(i32 %a) {
@@ -19,7 +19,7 @@ define i1 @f3(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f3:
-; CHECK: 	teq.w	r0, #11141290
+; CHECK: 	teq.w	{{.*}}, #11141290
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f6(i32 %a) {
@@ -28,7 +28,7 @@ define i1 @f6(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f6:
-; CHECK: 	teq.w	r0, #-872363008
+; CHECK: 	teq.w	{{.*}}, #-872363008
 
 ; 0xdddddddd = 3722304989
 define i1 @f7(i32 %a) {
@@ -37,7 +37,7 @@ define i1 @f7(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f7:
-; CHECK: 	teq.w	r0, #-572662307
+; CHECK: 	teq.w	{{.*}}, #-572662307
 
 ; 0xdddddddd = 3722304989
 define i1 @f8(i32 %a) {
@@ -53,5 +53,5 @@ define i1 @f10(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f10:
-; CHECK: 	teq.w	r0, #1114112
+; CHECK: 	teq.w	{{.*}}, #1114112
 
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index 8acae90..27ecad8 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
 
 define i1 @f2(i32 %a, i32 %b) {
 ; CHECK: f2
-; CHECK: teq.w r0, r1
+; CHECK: teq.w {{.*}}, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
@@ -13,7 +13,7 @@ define i1 @f2(i32 %a, i32 %b) {
 
 define i1 @f4(i32 %a, i32 %b) {
 ; CHECK: f4
-; CHECK: teq.w  r0, r1
+; CHECK: teq.w  {{.*}}, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
@@ -21,7 +21,7 @@ define i1 @f4(i32 %a, i32 %b) {
 
 define i1 @f6(i32 %a, i32 %b) {
 ; CHECK: f6
-; CHECK: teq.w  r0, r1, lsl #5
+; CHECK: teq.w  {{.*}}, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -30,7 +30,7 @@ define i1 @f6(i32 %a, i32 %b) {
 
 define i1 @f7(i32 %a, i32 %b) {
 ; CHECK: f7
-; CHECK: teq.w  r0, r1, lsr #6
+; CHECK: teq.w  {{.*}}, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -39,7 +39,7 @@ define i1 @f7(i32 %a, i32 %b) {
 
 define i1 @f8(i32 %a, i32 %b) {
 ; CHECK: f8
-; CHECK: teq.w  r0, r1, asr #7
+; CHECK: teq.w  {{.*}}, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -48,7 +48,7 @@ define i1 @f8(i32 %a, i32 %b) {
 
 define i1 @f9(i32 %a, i32 %b) {
 ; CHECK: f9
-; CHECK: teq.w  r0, r0, ror #8
+; CHECK: teq.w  {{.*}}, {{.*}}, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 43e208c..67fe82e 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
 
 ; 0x000000bb = 187
 define i1 @f2(i32 %a) {
@@ -10,7 +10,7 @@ define i1 @f2(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f2:
-; CHECK: 	tst.w	r0, #187
+; CHECK: 	tst.w	{{.*}}, #187
 
 ; 0x00aa00aa = 11141290
 define i1 @f3(i32 %a) {
@@ -19,7 +19,7 @@ define i1 @f3(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f3:
-; CHECK: 	tst.w	r0, #11141290
+; CHECK: 	tst.w	{{.*}}, #11141290
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f6(i32 %a) {
@@ -28,7 +28,7 @@ define i1 @f6(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f6:
-; CHECK: 	tst.w	r0, #-872363008
+; CHECK: 	tst.w	{{.*}}, #-872363008
 
 ; 0xdddddddd = 3722304989
 define i1 @f7(i32 %a) {
@@ -37,7 +37,7 @@ define i1 @f7(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f7:
-; CHECK: 	tst.w	r0, #-572662307
+; CHECK: 	tst.w	{{.*}}, #-572662307
 
 ; 0x00110000 = 1114112
 define i1 @f10(i32 %a) {
@@ -46,4 +46,4 @@ define i1 @f10(i32 %a) {
     ret i1 %tmp1
 }
 ; CHECK: f10:
-; CHECK: 	tst.w	r0, #1114112
+; CHECK: 	tst.w	{{.*}}, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index bfe016f..e3fe792 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
 
 define i1 @f2(i32 %a, i32 %b) {
 ; CHECK: f2:
-; CHECK: tst r0, r1
+; CHECK: tst {{.*}}, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
@@ -13,7 +13,7 @@ define i1 @f2(i32 %a, i32 %b) {
 
 define i1 @f4(i32 %a, i32 %b) {
 ; CHECK: f4:
-; CHECK: tst r0, r1
+; CHECK: tst {{.*}}, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
@@ -21,7 +21,7 @@ define i1 @f4(i32 %a, i32 %b) {
 
 define i1 @f6(i32 %a, i32 %b) {
 ; CHECK: f6:
-; CHECK: tst.w r0, r1, lsl #5
+; CHECK: tst.w {{.*}}, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -30,7 +30,7 @@ define i1 @f6(i32 %a, i32 %b) {
 
 define i1 @f7(i32 %a, i32 %b) {
 ; CHECK: f7:
-; CHECK: tst.w r0, r1, lsr #6
+; CHECK: tst.w {{.*}}, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -39,7 +39,7 @@ define i1 @f7(i32 %a, i32 %b) {
 
 define i1 @f8(i32 %a, i32 %b) {
 ; CHECK: f8:
-; CHECK: tst.w r0, r1, asr #7
+; CHECK: tst.w {{.*}}, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -48,7 +48,7 @@ define i1 @f8(i32 %a, i32 %b) {
 
 define i1 @f9(i32 %a, i32 %b) {
 ; CHECK: f9:
-; CHECK: tst.w r0, r0, ror #8
+; CHECK: tst.w {{.*}}, {{.*}}, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 03189aa..61e849e 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,15 +1,22 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s --check-prefix=M3
+; rdar://11318438
 
 define zeroext i8 @test1(i32 %A.u)  {
-; CHECK: test1
-; CHECK: uxtb r0, r0
+; A8: test1
+; A8: uxtb r0, r0
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
 define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
-; CHECK: test2
-; CHECK: uxtab  r0, r0, r1
+; A8: test2
+; A8: uxtab  r0, r0, r1
+
+; M3: test2
+; M3: uxtb  r1, r1
+; M3-NOT: uxtab
+; M3: add   r0, r1
     %C.u = trunc i32 %B.u to i8
     %D.u = zext i8 %C.u to i32
     %E.u = add i32 %A.u, %D.u
@@ -17,8 +24,8 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
 }
 
 define zeroext i32 @test3(i32 %A.u)  {
-; CHECK: test3
-; CHECK: uxth.w r0, r0, ror #8
+; A8: test3
+; A8: uxth.w r0, r0, ror #8
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
     %D.u = or i32 %B.u, %C.u
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 1e55557..d91e3b3 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {i(tpoff)}
+; RUN:     grep "i(tpoff)"
 ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {__aeabi_read_tp}
+; RUN:     grep "__aeabi_read_tp"
 ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN:     -relocation-model=pic | grep "__tls_get_addr"
 
 
 @i = thread_local global i32 15		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2484860..0af2445 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -3,7 +3,7 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llc < %s -march=x86 -stats |& not grep spilled
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
index 8b0a185..f201b98 100644
--- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -1,4 +1,4 @@
 ; RUN: llc < %s -march=x86 | \
-; RUN:   not grep {.byte\[\[:space:\]\]*true}
+; RUN:   not grep ".byte[[:space:]]*true"
 
 @X = global i1 true             ; <i1*> [#uses=0]
diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
index fea2b54..dde210b 100644
--- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | grep {(%esp}
-; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
-; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep "(%esp"
+; RUN: llc < %s -march=x86 | grep "pushl	%ebp" | count 1
+; RUN: llc < %s -march=x86 | grep "popl	%ebp" | count 1
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index c44d10a..e22aa6a 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {j\[lgbe\]}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT: {{j[lgbe]}}
 
 define i32 @max(i32 %A, i32 %B) nounwind {
         %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
index dc69ef8..f8bf099 100644
--- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 | not grep "subl.*%esp"
 
 define i32 @f(i32 %a, i32 %b) {
         %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index 0421896..1a3d749 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86  -stats |& \
+; RUN: llc < %s -march=x86  -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 7
 
 define i32 @g(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 8783a11..fb1262a 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
-; RUN: grep {movl	_last} %t | count 1
-; RUN: grep {cmpl.*_last} %t | count 1
+; RUN: grep "movl	_last" %t | count 1
+; RUN: grep "cmpl.*_last" %t | count 1
 
 @block = external global i8*            ; <i8**> [#uses=1]
 @last = external global i32             ; <i32*> [#uses=3]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index b045329..5cba3ef 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
-; RUN:   not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \
+; RUN:   not grep "Number of register spills"
 ; END.
 
 
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 7d0a6ab..1c75f93 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
+; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 14
 ;
 @size20 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 23954d7..95eefa1 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats  |& \
+; RUN: llc < %s -march=x86 -stats  2>&1 | \
 ; RUN:   grep asm-printer | grep 13
 
 define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index d58d638..3419d01 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep "subl.*%esp"
 
 @A = external global i16*		; <i16**> [#uses=1]
 @B = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 38bca28..37c5107 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 35
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
+; RUN:     grep "asm-printer" | grep 35
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index 3159cec..c5c74d1 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,7 +1,7 @@
 ; PR850
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
-; RUN: grep {movl 4(%eax),%ebp} %t
-; RUN: grep {movl 0(%eax), %ebx} %t
+; RUN: grep "movl 4(%eax),%ebp" %t
+; RUN: grep "movl 0(%eax), %ebx" %t
 
 define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
 	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
index a19d8f7..56d5f2f 100644
--- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN:    not grep {movl %eax, %edx}
+; RUN:    not grep "movl %eax, %edx"
 
 define i32 @foo(i32 %t, i32 %C) {
 entry:
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 6ec9a48..a58c9b1 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -52,8 +52,8 @@ entry:
         %tmp21 = load double* %tmp20            ; <double> [#uses=1]
         %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
         %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
-        br label %return
-return:         ; preds = %entry
+        br label %finish
+finish:
         %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
         ret i32 %retval.upgrd.8
 }
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index affb7af..783d9f9 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep movb %t | count 2
-; RUN: grep {movzb\[wl\]} %t
+; RUN: grep movb %t | count 1
+; RUN: grep "movzb[wl]" %t
 
 
 define void @handle_vector_size_attribute() nounwind {
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index a228898..04d4b8e 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep {,%rsp)} %t
+; RUN: not grep ",%rsp)" %t
 ; PR1103
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
index 3312e01..3b2e443 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
+; RUN: llc < %s -march=x86 | grep "mov %gs:72, %eax"
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index c1b1ad1..18b06dc 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=yonah -march=x86 | \
-; RUN:   grep {cmpltsd %xmm0, %xmm0}
+; RUN:   grep "cmpltsd %xmm0, %xmm0"
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
index 85a2ecc..cae68c9 100644
--- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | not grep {bsrl.*10}
+; RUN: llc < %s | not grep "bsrl.*10"
 ; PR1356
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index deb3999..c3d7e8a 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep "addl .12, %esp"
 ; PR1398
 
 	%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index 77291f0..aa0ee5d 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {movsbl}
+; RUN: llc < %s -march=x86 | grep "movsbl"
 
 @X = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index 5acb051..e81534b 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | FileCheck %s
+; CHECK-NOT: lea R
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index 228a915..56a109a 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static | grep {foo str$}
+; RUN: llc < %s -relocation-model=static | grep "foo str$"
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux"
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 2e95082..99df20d 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep "(%esp)" | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 266fd7b..39af931 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -10,10 +10,10 @@
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
-define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind  {
 entry:
 	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
-	br i1 true, label %bb951, label %bb917
+	br i1 %cond, label %bb951, label %bb917
 
 bb917:		; preds = %entry
 	ret i32 0
@@ -21,7 +21,7 @@ bb917:		; preds = %entry
 bb951:		; preds = %bb986, %entry
 	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
 	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
-	br i1 true, label %bb986, label %bb967
+	br i1 %cond, label %bb986, label %bb967
 
 bb967:		; preds = %bb951
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 0091397..9584b71 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast -optimize-regalloc=0
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index bdacf50..a1b973d 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 5115e48..a52b365 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | grep {a:} | not grep ax
-; RUN: llc < %s | grep {b:} | not grep ax
+; RUN: llc < %s | grep "a:" | not grep ax
+; RUN: llc < %s | grep "b:" | not grep ax
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index da02907..9185a36 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -march=x86 -mattr=+mmx | grep esi
 ; PR2082
 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
 ; registers.
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 4dc3a10..5ca7e3e 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep "#" | not grep -v "##"
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 109069e..3a1de11 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep ", %e"
 
 	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index 859041e..f244793 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep "%e"
 ; Make sure xorl operands are 32-bit registers.
 
 	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index 5b97eb7..7c04206 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
-; RUN: not grep {r\[abcd\]x} %t
-; RUN: not grep {r\[ds\]i} %t
-; RUN: not grep {r\[bs\]p} %t
+; RUN: not grep "r[abcd]x" %t
+; RUN: not grep "r[ds]i" %t
+; RUN: not grep "r[bs]p" %t
 
 	%struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* }
 	%struct.BltData = type { float, float, float, float }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index c068f8a..4e73b5a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast -optimize-regalloc=0
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
index 99cb856..bdac8fd 100644
--- a/test/CodeGen/X86/2008-08-06-CmpStride.ll
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
+; RUN: llc -march=x86-64 < %s -o - | grep "cmpl	\$[1], %"
 
 @.str = internal constant [4 x i8] c"%d\0A\00"
 
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index 1d27fc5..c63c890 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,15 +1,36 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %ebp | count 9
-; RUN: llc < %s | grep %ecx | count 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i386-pc-linux"
 
-define i8* @test(i32 %a, i8* %b)  {
+; CHECK: test1
+; CHECK: pushl %ebp
+define i8* @test1(i32 %a, i8* %b)  {
 entry:
   call void @llvm.eh.unwind.init()
   %foo   = alloca i32
   call void @llvm.eh.return.i32(i32 %a, i8* %b)
+; CHECK: movl 12(%ebp), %[[ECX:e..]]
+; CHECK: movl 8(%ebp), %[[EAX:e..]]
+; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]])
+; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]]
+; CHECK: movl %[[ECX2]], %esp
+; CHECK: ret
+  unreachable
+}
+
+; CHECK: test2
+; CHECK: pushl %ebp
+define i8* @test2(i32 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.return.i32(i32 %a, i8* %b)
+; CHECK: movl 12(%ebp), %[[ECX:e..]]
+; CHECK: movl 8(%ebp), %[[EAX:e..]]
+; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]])
+; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]]
+; CHECK: movl %[[ECX2]], %esp
+; CHECK: ret
   unreachable
 }
 
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 86e50c9..4b2774b 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
-; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
 
 ; %0 must not be put in EAX or EDX.
 ; In the first asm, $0 and $2 must not be put in EAX.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 6867ae7..5c2fbee 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
 
diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
index 421b931..e504bc3 100644
--- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep "ucomiss[^,]*esp"
 
 define void @f(float %wt) {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 9d144a4..b2cf34cd 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s
 ; Now this test spills one register. But a reload in the loop is cheaper than
 ; the divsd so it's a win.
 
@@ -17,8 +17,7 @@ bb:		; preds = %bb, %entry
 ; CHECK: %bb30.loopexit
 ; CHECK: divsd %xmm0
 ; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
+; CHECK: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index 2edcaea..0e95c9e 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+; RUN: llc < %s -march=x86 -relocation-model=static | grep "lea.*X.*esp" | count 2
 
 @X = external global [0 x i32]
 
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 3e42553..b478f27 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {mov.*56}
+; RUN: llc < %s -march=x86 | grep "mov.*56"
 ; PR3449
 
 define void @test(<8 x double>* %P, i64* %Q) nounwind {
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 7ea6998..9cbf350 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
 ; rdar://6608609
 
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 0b5b7bd..d50fe6f 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
 
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index 3564f01..847a43f 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep ".space"
 ; rdar://6668548
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 8bbdb0e..d934ec9 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
 ; RUN: not grep spill %t
-; RUN: not grep {%rsp} %t
-; RUN: not grep {%rbp} %t
+; RUN: not grep "%rsp" %t
+; RUN: not grep "%rbp" %t
 
 ; The register-pressure scheduler should be able to schedule this in a
 ; way that does not require spills.
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index f46eed4..ad18a0c 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 9f5a8c5..5cb05e8 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index d6ed0c4..08bf9e3 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
-; RUN: grep {leaq.*TLSGD} %t2
-; RUN: grep {__tls_get_addr} %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic > %t2
+; RUN: grep "leaq.*TLSGD" %t2
+; RUN: grep "__tls_get_addr" %t2
 ; PR4004
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index a2fd2e4..a6ed74b 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
+; RUN: llc < %s | grep "movl.*%ebx, 8(%esi)"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.0"
 
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index af552d4..fe04272 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+; RUN: llc < %s -march=x86-64 | not grep "movzbl	%[abcd]h,"
 
 define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index de930d5..b8effa6 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
-; RUN: grep {movswl	%ax, %edi} %t
-; RUN: grep {movw	(%rax), %ax} %t
+; RUN: grep "movswl	%ax, %edi" %t
+; RUN: grep "movw	(%rax), %ax" %t
 ; XFAIL: *
 
 @x = common global i16 0
diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index cd8960b..eb4a5c0 100644
--- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt
 
 define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
index 90eb84d..35f2339 100644
--- a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
+++ b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast %s -o %t
+; RUN: llc -regalloc=fast -optimize-regalloc=0 %s -o %t
 ; PR7066
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/2010-05-12-FastAllocKills.ll b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
index 36a99d6..eb0b150 100644
--- a/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
+++ b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast -verify-machineinstrs < %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin"
 
diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 4639866..9b47bb7 100644
--- a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast < %s | FileCheck %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 < %s | FileCheck %s
 ; PR7382
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index c6f4b49..be10ad5 100644
--- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -12,9 +12,9 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData
 
 ; Avoid hoisting the test above loads or copies
 ; CHECK: %entry
-; CHECK: cmpq
+; CHECK: test
 ; CHECK-NOT: mov
-; CHECK: jb
+; CHECK: je
 define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
 entry:
   %0 = load i8** null, align 8
diff --git a/test/CodeGen/X86/2011-04-19-sclr-bb.ll b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
new file mode 100644
index 0000000..771e4b3
--- /dev/null
+++ b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that values of illegal types are not scalarized between basic blocks.
+;CHECK: test
+;CHECK-NOT: pinsrw
+;CHECK-NOT: pextrb
+;CHECK: ret
+define void @test(i1 %cond) {
+ENTRY:
+  br label %LOOP
+LOOP:
+  %vec1 = phi <4 x i1> [ %vec1_or_2, %LOOP ], [ zeroinitializer, %ENTRY ]
+  %vec2 = phi <4 x i1> [ %vec2_and_1, %LOOP ], [ zeroinitializer, %ENTRY ]
+  %vec1_or_2 = or <4 x i1> %vec1, %vec2
+  %vec2_and_1 = and <4 x i1> %vec2, %vec1
+  br i1 %cond, label %LOOP, label %EXIT
+
+EXIT:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index bf7f583..ce63c74 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse | FileCheck %s
 
 define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
 entry:
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index a51dad0..47ef693 100644
--- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats |& FileCheck %s
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s
 ;
 ; This test should not cause any spilling with RAFast.
 ;
diff --git a/test/CodeGen/X86/2011-09-18-sse2cmp.ll b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
index 844d674..a6f428f 100644
--- a/test/CodeGen/X86/2011-09-18-sse2cmp.ll
+++ b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
+;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
 
 ;CHECK: @max
 ;CHECK: cmplepd
diff --git a/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index ed5649c..4daf678 100644
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
 
 ; Make sure we are not crashing on this code.
 
diff --git a/test/CodeGen/X86/2011-10-11-srl.ll b/test/CodeGen/X86/2011-10-11-srl.ll
index cf9d36f..6c6d340 100644
--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=-sse41 
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41 
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index 6f9188c..dc3a08b 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
-; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
index 557d49d..477b4de 100644
--- a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=core2 -mattr=+sse | FileCheck %s
 ; PR11940: Do not optimize away movb %al, %ch
 
 %struct.APInt = type { i64* }
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 101ecca..18a3313 100644
--- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
-; RUN:   not grep {Number of machine instructions hoisted out of loops post regalloc}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \
+; RUN:   not grep "Number of machine instructions hoisted out of loops post regalloc"
 
 ; rdar://11095580
 
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 9543587..9a66b67 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,7 +5,8 @@
 ; It's hard to test for the ISEL condition because CodeGen optimizes
 ; away the bugpointed code. Just ensure the basics are still there.
 ;CHECK: func:
-;CHECK: vmovups
+;CHECK: vpxor
+;CHECK: vinsertf128
 ;CHECK: vpshufd
 ;CHECK: vpshufd
 ;CHECK: vmulps
diff --git a/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
new file mode 100644
index 0000000..171c3f1
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=source | FileCheck %s
+
+; Teach two-address pass to update the "source" map so it doesn't perform a
+; non-profitable commute using outdated info. The test case would still fail
+; because of poor pre-RA schedule. That will be fixed by MI scheduler.
+; rdar://11472010
+define i32 @t(i32 %mask) nounwind readnone ssp {
+entry:
+; CHECK: t:
+; CHECK-NOT: mov
+  %sub = add i32 %mask, -65535
+  %shr = lshr i32 %sub, 23
+  %and = and i32 %mask, 1
+  %add = add i32 %shr, %and
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll
new file mode 100644
index 0000000..837fbc0
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -verify-coalescing
+; PR12892
+;
+; Dead code elimination during coalesing causes a live range to split into two
+; virtual registers. Stale identity copies that had already been joined were
+; interfering with the liveness computations.
+
+target triple = "i386-pc-linux-gnu"
+
+define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 {
+  invoke void @_ZNK4llvm13CodeGenTarget12getAsmParserEv()
+          to label %1 unwind label %5
+
+; <label>:1                                       ; preds = %0
+  invoke void @_ZNK4llvm6Record16getValueAsStringENS_9StringRefE()
+          to label %4 unwind label %2
+
+; <label>:2                                       ; preds = %1
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  unreachable
+
+; <label>:4                                       ; preds = %1
+  invoke void @_ZN4llvm18isCurrentDebugTypeEPKc()
+          to label %12 unwind label %7
+
+; <label>:5                                       ; preds = %0
+  %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %33
+
+; <label>:7                                       ; preds = %4
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %9
+
+; <label>:9                                       ; preds = %28, %7
+  %10 = phi { i8*, i32 } [ %29, %28 ], [ %8, %7 ]
+  %11 = extractvalue { i8*, i32 } %10, 1
+  invoke fastcc void @_ZN12_GLOBAL__N_114AsmMatcherInfoD2Ev()
+          to label %32 unwind label %35
+
+; <label>:12                                      ; preds = %4
+  invoke void @_ZNK4llvm13CodeGenTarget10getRegBankEv()
+          to label %13 unwind label %16
+
+; <label>:13                                      ; preds = %12
+  br label %14
+
+; <label>:14                                      ; preds = %20, %13
+  %15 = icmp eq i32 undef, 0
+  br i1 %15, label %20, label %18
+
+; <label>:16                                      ; preds = %12
+  %17 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %26
+
+; <label>:18                                      ; preds = %14
+  invoke void @_ZNSs4_Rep9_S_createEjjRKSaIcE()
+          to label %19 unwind label %21
+
+; <label>:19                                      ; preds = %18
+  unreachable
+
+; <label>:20                                      ; preds = %14
+  br label %14
+
+; <label>:21                                      ; preds = %18
+  %22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %23 = extractvalue { i8*, i32 } %22, 1
+  br i1 undef, label %26, label %24
+
+; <label>:24                                      ; preds = %21
+  br i1 undef, label %25, label %26
+
+; <label>:25                                      ; preds = %24
+  unreachable
+
+; <label>:26                                      ; preds = %24, %21, %16
+  %27 = phi i32 [ 0, %16 ], [ %23, %21 ], [ %23, %24 ]
+  invoke void @_ZNSt6vectorISt4pairISsSsESaIS1_EED1Ev()
+          to label %28 unwind label %30
+
+; <label>:28                                      ; preds = %26
+  %29 = insertvalue { i8*, i32 } undef, i32 %27, 1
+  br label %9
+
+; <label>:30                                      ; preds = %26
+  %31 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+
+; <label>:32                                      ; preds = %9
+  br label %33
+
+; <label>:33                                      ; preds = %32, %5
+  %34 = phi i32 [ undef, %5 ], [ %11, %32 ]
+  unreachable
+
+; <label>:35                                      ; preds = %9
+  %36 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+declare void @_ZNK4llvm13CodeGenTarget12getAsmParserEv()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZNK4llvm6Record16getValueAsStringENS_9StringRefE()
+
+declare void @_ZN4llvm18isCurrentDebugTypeEPKc()
+
+declare fastcc void @_ZN12_GLOBAL__N_114AsmMatcherInfoD2Ev() unnamed_addr inlinehint align 2
+
+declare hidden void @_ZNSt6vectorISt4pairISsSsESaIS1_EED1Ev() unnamed_addr align 2
+
+declare void @_ZNSs4_Rep9_S_createEjjRKSaIcE()
+
+declare void @_ZNK4llvm13CodeGenTarget10getRegBankEv()
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll
new file mode 100644
index 0000000..1c1e8e2
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
+
+define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
+entry:
+  ; CHECK: vmovaps
+  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
+  ; CHECK: vmovups
+  %A = load <4 x i32>* %Ap
+  %B = load <4 x i32>* %Bp
+  %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  store <8 x i32> %Z, <8 x i32>* %P, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
new file mode 100644
index 0000000..906b748
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: load_store
+define void @load_store(<4 x i16>* %in) {
+entry:
+; CHECK: movsd
+  %A27 = load <4 x i16>* %in, align 4
+  %A28 = add <4 x i16> %A27, %A27
+; CHECK: movlpd
+  store <4 x i16> %A28, <4 x i16>* %in, align 4
+  ret void
+; CHECK: ret
+}
+
+; Make sure that we store a 64bit value, even on 32bit systems.
+;CHECK: store_64
+define void @store_64(<2 x i32>* %ptr) {
+BB:
+  store <2 x i32> zeroinitializer, <2 x i32>* %ptr
+  ret void
+;CHECK: movlpd
+;CHECK: ret
+}
+
+;CHECK: load_64
+define <2 x i32> @load_64(<2 x i32>* %ptr) {
+BB:
+  %t = load <2 x i32>* %ptr
+  ret <2 x i32> %t
+;CHECK: movsd
+;CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-07-10-shufnorm.ll b/test/CodeGen/X86/2012-07-10-shufnorm.ll
new file mode 100644
index 0000000..e39df58
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-shufnorm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx | FileCheck %s
+
+; CHECK: ocl
+define void @ocl() {
+entry:
+  %vext = shufflevector <2 x double> zeroinitializer, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit = shufflevector <8 x double> %vext, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1 = insertelement <8 x double> %vecinit, double undef, i32 2
+  %vecinit3 = insertelement <8 x double> %vecinit1, double undef, i32 3
+  %vecinit5 = insertelement <8 x double> %vecinit3, double 0.000000e+00, i32 4
+  %vecinit9 = shufflevector <8 x double> %vecinit5, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 9, i32 10>
+  store <8 x double> %vecinit9, <8 x double>* undef
+  ret void
+; CHECK: vxorps
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
new file mode 100644
index 0000000..3b7a8a7
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
+
+declare x86_fastcallcc i64 @barrier()
+
+;CHECK: bcast_fold
+;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: barrier
+;CHECK: vbroadcastss [[SPILLED]], %ymm0
+;CHECK: ret
+define <8 x float> @bcast_fold( float* %A) {
+BB:
+  %A0 = load float* %A
+  %tt3 = call x86_fastcallcc i64 @barrier()
+  br i1 undef, label %work, label %exit
+
+work:
+  %A1 = insertelement <8 x float> undef, float %A0, i32 0
+  %A2 = shufflevector <8 x float> %A1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %A2
+
+exit:
+  ret <8 x float> undef
+}
diff --git a/test/CodeGen/X86/2012-07-15-tconst_shl.ll b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
new file mode 100644
index 0000000..46eca76
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+avx2
+; make sure that we are not crashing.
+
+define <16 x i32> @autogen_SD34717() {
+BB:
+  %Shuff7 = shufflevector <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 undef, i32 22, i32 24, i32 26, i32 28, i32 30, i32 undef>
+  %B9 = lshr <16 x i32> zeroinitializer, %Shuff7
+  ret <16 x i32> %B9
+}
diff --git a/test/CodeGen/X86/2012-07-15-vshl.ll b/test/CodeGen/X86/2012-07-15-vshl.ll
new file mode 100644
index 0000000..cd0fef4
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-vshl.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx
+; PR13352
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define void @f_f() nounwind {
+allocas:
+  br label %for_loop29
+
+for_loop29:                                       ; preds = %safe_if_after_true, %allocas
+  %indvars.iv596 = phi i64 [ %indvars.iv.next597, %safe_if_after_true ], [ 0, %allocas ]
+  %0 = trunc i64 %indvars.iv596 to i32
+  %smear.15 = insertelement <16 x i32> undef, i32 %0, i32 15
+  %bitop = lshr <16 x i32> zeroinitializer, %smear.15
+  %bitop35 = and <16 x i32> %bitop, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %bitop35_to_bool = icmp ne <16 x i32> %bitop35, zeroinitializer
+  %val_to_boolvec32 = sext <16 x i1> %bitop35_to_bool to <16 x i32>
+  %floatmask.i526 = bitcast <16 x i32> %val_to_boolvec32 to <16 x float>
+  %mask1.i529 = shufflevector <16 x float> %floatmask.i526, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %"internal_mask&function_mask41_any" = icmp eq i32 undef, 0
+  br i1 %"internal_mask&function_mask41_any", label %safe_if_after_true, label %safe_if_run_true
+
+safe_if_after_true:                               ; preds = %for_loop29
+  %indvars.iv.next597 = add i64 %indvars.iv596, 1
+  br label %for_loop29
+
+safe_if_run_true:                                 ; preds = %for_loop29
+  %blend1.i583 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> undef, <8 x float> undef, <8 x float> %mask1.i529) nounwind
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-07-16-LeaUndef.ll b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
new file mode 100644
index 0000000..9e5cbd2
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD2543() {
+A:
+  %E83 = add i32 0, 1
+  %E820 = add i32 0, undef
+  br label %C
+C:
+  %B908 = add i32 %E83, %E820
+  store i32 %B908, i32* undef
+  %Sl2391 = select i1 undef, i32 undef, i32 %E83
+  %Cmp3114 = icmp ne i32 %Sl2391, undef
+  br i1 %Cmp3114, label %C, label %G
+G:
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
new file mode 100644
index 0000000..17533a1
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD3100() {
+BB:
+  %FC123 = fptoui float 0x40693F5D00000000 to i1
+  br i1 %FC123, label %V, label %W
+
+V:
+  ret void
+W:
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-07-17-vtrunc.ll b/test/CodeGen/X86/2012-07-17-vtrunc.ll
new file mode 100644
index 0000000..2de2f97
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-17-vtrunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD33189483() {
+BB:
+  br label %CF76
+
+CF76:                                             ; preds = %CF76, %BB
+  %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> undef, <4 x i32> zeroinitializer
+  %Tr16 = trunc <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i1>
+  %E19 = extractelement <8 x i1> %Tr16, i32 2
+  br i1 %E19, label %CF76, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF76
+  %BC = bitcast <4 x i64> %Shuff13 to <4 x double>
+  br label %CF78
+}
diff --git a/test/CodeGen/X86/2012-07-23-select_cc.ll b/test/CodeGen/X86/2012-07-23-select_cc.ll
new file mode 100644
index 0000000..33fcb12
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-23-select_cc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; PR 13428
+
+declare void @use(double)
+
+define void @test() {
+entry:
+  call void @use(double 1.000000e+00)
+  %A = icmp eq i64 undef, 2
+  %B = zext i1 %A to i32
+  %C = sitofp i32 %B to double
+  call void @use(double %C)
+  call void @use(double 0.000000e+00)
+  unreachable
+}
diff --git a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
new file mode 100644
index 0000000..000b853
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; Cmp lowering should not look past the truncate unless the high bits are known
+; zero.
+; rdar://12027825
+
+define void @foo(i8 %arg4, i32 %arg5, i32* %arg14) nounwind {
+bb:
+; CHECK: foo:
+; CHECK-NOT: testl
+; CHECK: testb
+  %tmp48 = zext i8 %arg4 to i32
+  %tmp49 = and i32 %tmp48, 32
+  %tmp50 = add i32 %tmp49, 1593371643
+  %tmp55 = sub i32 %tmp50, 0
+  %tmp56 = add i32 %tmp55, 7787538
+  %tmp57 = xor i32 %tmp56, 1601159181
+  %tmp58 = xor i32 %arg5, 1601159181
+  %tmp59 = and i32 %tmp57, %tmp58
+  %tmp60 = add i32 %tmp59, -1263900958
+  %tmp67 = sub i32 %tmp60, 0
+  %tmp103 = xor i32 %tmp56, 13
+  %tmp104 = trunc i32 %tmp103 to i8
+  %tmp105 = sub i8 0, %tmp104
+  %tmp106 = add i8 %tmp105, -103
+  %tmp113 = sub i8 %tmp106, 0
+  %tmp114 = add i8 %tmp113, -72
+  %tmp141 = icmp ne i32 %tmp67, -1263900958
+  %tmp142 = select i1 %tmp141, i8 %tmp114, i8 undef
+  %tmp143 = xor i8 %tmp142, 81
+  %tmp144 = zext i8 %tmp143 to i32
+  %tmp145 = add i32 %tmp144, 2062143348
+  %tmp152 = sub i32 %tmp145, 0
+  store i32 %tmp152, i32* %arg14
+  ret void
+}
diff --git a/test/CodeGen/X86/4char-promote.ll b/test/CodeGen/X86/4char-promote.ll
index 386057f..4f1a859 100644
--- a/test/CodeGen/X86/4char-promote.ll
+++ b/test/CodeGen/X86/4char-promote.ll
@@ -1,11 +1,12 @@
 ; A test for checking PR 9623
-;RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
 
 target triple = "x86_64-apple-darwin"
 
-; CHECK:  pmulld 
-; CHECK:  paddd  
-; CHECK:  movdqa 
+; CHECK:  pmulld
+; CHECK:  paddd
+; CHECK-NOT:  movdqa
+; CHECK:  ret
 
 define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y) {
 entry:
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
index 3758fd8..3314168 100644
--- a/test/CodeGen/X86/MachineSink-PHIUse.ll
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats |& grep {machine-sink}
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink"
 
 define fastcc void @t() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 8e871f4..03d2e47 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,8 +1,6 @@
 ; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
-
-; Some of these tests depend on -join-physregs to commute instructions.
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
 
 ; The immediate can be encoded in a smaller way if the
 ; instruction is a sub instead of an add.
@@ -101,9 +99,9 @@ define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
 }
 
 ; X64: test7:
-; X64: addl %e[[A1]], %eax
+; X64: addl %e[[A1]], %e
 ; X64-NEXT: setb %dl
-; X64-NEXT: ret
+; X64: ret
 
 ; PR5443
 define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index 49abd8a..15fbec5 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | grep {__TEXT,__const}
+; RUN: llc %s -o - | grep "__TEXT,__const"
 ; PR5929
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
index 7715869..eab02cc 100644
--- a/test/CodeGen/X86/aligned-comm.ll
+++ b/test/CodeGen/X86/aligned-comm.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
-; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep "array,16512,7"
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep "array,16512,7"
 
 ; Darwin 9+ should get alignment on common symbols.
 @array = common global [4128 x i32] zeroinitializer, align 128
diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll
index cc709b5..1f9e85c 100644
--- a/test/CodeGen/X86/alignment-2.ll
+++ b/test/CodeGen/X86/alignment-2.ll
@@ -18,7 +18,9 @@
 define signext i8 @do_lo_list() nounwind optsize ssp {
 bb:
 ; CHECK:     do_lo_list
-; CHECK-NOT: movaps
+; Make sure we do not use movaps for the global variable.
+; It is okay to use movaps for writing the local variable on stack.
+; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}}
   %myopt = alloca %struct.printQueryOpt, align 4
   %tmp = bitcast %struct.printQueryOpt* %myopt to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll
index c0f1a18..a45284e 100644
--- a/test/CodeGen/X86/alloca-align-rounding-32.ll
+++ b/test/CodeGen/X86/alloca-align-rounding-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
 
 declare void @bar(<2 x i64>* %n)
 
@@ -6,10 +6,15 @@ define void @foo(i32 %h) {
   %p = alloca <2 x i64>, i32 %h
   call void @bar(<2 x i64>* %p)
   ret void
+; CHECK: foo
+; CHECK-NOT: andl $-32, %eax
 }
 
 define void @foo2(i32 %h) {
   %p = alloca <2 x i64>, i32 %h, align 32
   call void @bar(<2 x i64>* %p)
   ret void
+; CHECK: foo2
+; CHECK: andl $-32, %esp
+; CHECK: andl $-32, %eax
 }
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 3c87dbf..3d76fb0 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
 
 declare void @bar(<2 x i64>* %n)
 
@@ -6,10 +6,15 @@ define void @foo(i64 %h) {
   %p = alloca <2 x i64>, i64 %h
   call void @bar(<2 x i64>* %p)
   ret void
+; CHECK: foo
+; CHECK-NOT: andq $-32, %rax
 }
 
 define void @foo2(i64 %h) {
   %p = alloca <2 x i64>, i64 %h, align 32
   call void @bar(<2 x i64>* %p)
   ret void
+; CHECK: foo2
+; CHECK: andq $-32, %rsp
+; CHECK: andq $-32, %rax
 }
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index a3dc85f..640237d 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding -join-physregs | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
 
 ; PR8365
 ; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/asm-reg-type-mismatch.ll
index f0d46a0..47accdb 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/asm-reg-type-mismatch.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movap
+; RUN: llc < %s -mcpu=core2 | FileCheck %s
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
@@ -11,8 +10,22 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 }
 	%struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix }
 
-define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
+define i32 @test1(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
 entry:
 	call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind
 	ret i32 0
+	; CHECK: test1
+	; CHECK-NOT: movap
+	; CHECK: xorps
+	; CHECK: xorps
+	; CHECK-NOT: movap
+}
+
+define i64 @test2() nounwind {
+entry:
+  %0 = tail call i64 asm sideeffect "movq $1, $0", "={xmm7},*m,~{dirflag},~{fpsr},~{flags}"(i64* null) nounwind
+  ret i64 %0
+  ; CHECK: test2
+	; CHECK: movq {{.*}}, %xmm7
+	; CHECK: movd %xmm7, %rax
 }
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 5942788..19482e1 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM %s
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
 
 declare void @use_arr(i8*)
 declare void @many_params(i32, i32, i32, i32, i32, i32)
 
 define void @test1() nounwind {
-; atom: test1:
-; atom: leal -1052(%esp), %esp
-; atom-NOT: sub
-; atom: call
-; atom: leal 1052(%esp), %esp
+; ATOM: test1:
+; ATOM: leal -1052(%esp), %esp
+; ATOM-NOT: sub
+; ATOM: call
+; ATOM: leal 1052(%esp), %esp
 
 ; CHECK: test1:
 ; CHECK: subl
@@ -22,10 +22,10 @@ define void @test1() nounwind {
 }
 
 define void @test2() nounwind {
-; atom: test2:
-; atom: leal -28(%esp), %esp
-; atom: call
-; atom: leal 28(%esp), %esp
+; ATOM: test2:
+; ATOM: leal -28(%esp), %esp
+; ATOM: call
+; ATOM: leal 28(%esp), %esp
 
 ; CHECK: test2:
 ; CHECK-NOT: lea
@@ -34,9 +34,9 @@ define void @test2() nounwind {
 }
 
 define void @test3() nounwind {
-; atom: test3:
-; atom: leal -8(%esp), %esp
-; atom: leal 8(%esp), %esp
+; ATOM: test3:
+; ATOM: leal -8(%esp), %esp
+; ATOM: leal 8(%esp), %esp
 
 ; CHECK: test3:
 ; CHECK-NOT: lea
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index 4dd9a9e..0d97e85 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,9 +1,6 @@
-; XFAIL: *
 ; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
 ; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
 ;
-; FIXME: Atom's scheduler is temporarily disabled.
-; XFAIL: *
 
 @a = common global i32 0, align 4
 @b = common global i32 0, align 4
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 7c5abe2..152bece 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 7729491..188efe2 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx  -mattr=+avx | FileCheck %s
 
 ; AVX128 tests:
 
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index b334932..c44beb4 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
 
 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vaesdec
@@ -1154,7 +1154,7 @@ define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1165,7 +1165,7 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1176,7 +1176,7 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1187,7 +1187,7 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1198,7 +1198,7 @@ define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1209,6 +1209,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -1226,7 +1227,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
 
 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1235,7 +1236,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1244,7 +1245,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1253,7 +1254,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1262,7 +1263,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1271,6 +1272,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -2555,3 +2557,36 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
   ret i32 %tmp
 }
 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+
+; CHECK: movntdq
+define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
+  %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
+  tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
+  ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+; CHECK: movntps
+define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
+  tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
+  ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+; CHECK: movntpd
+define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
+  ; add operation forces the execution domain.
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
+  ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+; Check for pclmulqdq
+define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK: vpclmulqdq
+  %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index 7c58820..eff9251 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
 
 ; UNSAFE: maxpd:
 ; UNSAFE: vmaxpd {{.+}}, %xmm
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
index 5268ec3a..e203c4e 100755
--- a/test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -4,5 +4,5 @@ define <4 x i64> @test1(<4 x i64> %a) nounwind {
  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  ret <4 x i64>%b
  ; CHECK: test1:
- ; CHECK: vinsertf128
+ ; CHECK-NOT: vinsertf128
  }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 16c447b..9b41709 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -90,8 +90,8 @@ define i32 @test9(<4 x i32> %a) nounwind {
 ; Extract a value which is the result of an undef mask.
 define i32 @test10(<4 x i32> %a) nounwind {
 ; CHECK: @test10
-; CHECK-NEXT: #
-; CHECK-NEXT: ret
+; CHECK-NOT: {{^[^#]*[a-z]}}
+; CHECK: ret
   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %r = extractelement <8 x i32> %b, i32 2
   ret i32 %r
@@ -149,17 +149,26 @@ entry:
 }
 
 ; PR12413
+; CHECK: shuf1
+; CHECK: vpshufb
+; CHECK: vpshufb
 ; CHECK: vpshufb
 ; CHECK: vpshufb
+define <32 x i8> @shuf1(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
+ ret <32 x i8> %0
+}
+
+; handle the case where only half of the 256-bits is splittable
+; CHECK: shuf2
 ; CHECK: vpshufb
 ; CHECK: vpshufb
-define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+; CHECK: vpextrb
+; CHECK: vpextrb
+define <32 x i8> @shuf2(<32 x i8> %inval1, <32 x i8> %inval2) {
 entry:
- %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
-i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
-22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
-42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
-62>
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 31, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
  ret <32 x i8> %0
 }
 
@@ -202,3 +211,40 @@ define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
   %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   ret <4 x i64> %t
 }
+
+; CHECK: narrow
+; CHECK: vpermilps
+; CHECK: ret
+define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
+  ret <16 x i16> %t
+}
+
+;CHECK: test17
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+define   <8 x float> @test17(<4 x float> %y) {
+  %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %x
+}
+
+; CHECK: test18
+; CHECK: vshufps
+; CHECK: vshufps
+; CHECK: vunpcklps
+; CHECK: ret
+define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
+  %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x float>%S
+}
+
+; CHECK: test19
+; CHECK: vshufps
+; CHECK: vshufps
+; CHECK: vunpcklps
+; CHECK: ret
+define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
+  %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x float>%S
+}
+
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 148ae73..0d403d4 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -112,3 +112,32 @@ entry:
   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   ret <2 x double> %vecinit2.i
 }
+
+; CHECK: _RR
+; CHECK: vbroadcastss (%
+; CHECK: ret
+define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+entry:
+  %q = load float* %ptr, align 4
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  ; force a chain
+  %j = load i32* %k, align 4
+  store i32 %j, i32* undef
+  ret <4 x float> %vecinit6.i
+}
+
+
+; CHECK: _RR2
+; CHECK: vbroadcastss (%
+; CHECK: ret
+define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+entry:
+  %q = load float* %ptr, align 4
+  %v = insertelement <4 x float> undef, float %q, i32 0
+  %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %t
+}
+
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
new file mode 100755
index 0000000..b474913
--- /dev/null
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: trunc4
+; CHECK: vpermd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+
+; CHECK: trunc8
+; CHECK: vpshufb
+; CHECK-NOT: vinsert
+; CHECK: ret
+
+define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
+; CHECK: sext4
+; CHECK: vpmovsxdq
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i64> @sext4(<4 x i32> %A) nounwind {
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+; CHECK: sext8
+; CHECK: vpmovsxwd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <8 x i32> @sext8(<8 x i16> %A) nounwind {
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+; CHECK: zext4
+; CHECK: vpmovzxdq
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i64> @zext4(<4 x i32> %A) nounwind {
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+; CHECK: zext8
+; CHECK: vpmovzxwd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <8 x i32> @zext8(<8 x i16> %A) nounwind {
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+; CHECK: zext_8i8_8i32
+; CHECK: vpmovzxwd
+; CHECK: vpand
+; CHECK: ret
+define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
+  %B = zext <8 x i8> %A to <8 x i32>  
+  ret <8 x i32>%B
+}
+
+
+
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 3f27a02..a6141b0 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -976,3 +976,182 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   ret void
 }
 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1,
+                     <4 x i32> %idx, <2 x double> %mask) {
+  ; CHECK: vgatherdpd
+  %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
+                            i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
+                      <4 x i32>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
+                     <4 x i32> %idx, <4 x double> %mask) {
+  ; CHECK: vgatherdpd
+  %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
+                            i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
+                      <4 x i32>, <4 x double>, i8) nounwind readonly
+
+define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
+                     <2 x i64> %idx, <2 x double> %mask) {
+  ; CHECK: vgatherqpd
+  %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
+                            i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*,
+                      <2 x i64>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1,
+                     <4 x i64> %idx, <4 x double> %mask) {
+  ; CHECK: vgatherqpd
+  %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
+                            i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
+                      <4 x i64>, <4 x double>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1,
+                     <4 x i32> %idx, <4 x float> %mask) {
+  ; CHECK: vgatherdps
+  %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
+                            i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+                      <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1,
+                     <8 x i32> %idx, <8 x float> %mask) {
+  ; CHECK: vgatherdps
+  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+                            i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
+                      <8 x i32>, <8 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1,
+                     <2 x i64> %idx, <4 x float> %mask) {
+  ; CHECK: vgatherqps
+  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
+                            i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
+                      <2 x i64>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
+                     <4 x i64> %idx, <4 x float> %mask) {
+  ; CHECK: vgatherqps
+  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
+                            i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
+                      <4 x i64>, <4 x float>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
+                     <4 x i32> %idx, <2 x i64> %mask) {
+  ; CHECK: vpgatherdq
+  %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
+                            i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
+                      <4 x i32>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
+                     <4 x i32> %idx, <4 x i64> %mask) {
+  ; CHECK: vpgatherdq
+  %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
+                            i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
+                      <4 x i32>, <4 x i64>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
+                     <2 x i64> %idx, <2 x i64> %mask) {
+  ; CHECK: vpgatherqq
+  %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
+                            i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
+                      <2 x i64>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
+                     <4 x i64> %idx, <4 x i64> %mask) {
+  ; CHECK: vpgatherqq
+  %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
+                            i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
+                      <4 x i64>, <4 x i64>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
+                     <4 x i32> %idx, <4 x i32> %mask) {
+  ; CHECK: vpgatherdd
+  %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
+                            i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
+                      <4 x i32>, <4 x i32>, i8) nounwind readonly
+
+define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
+                     <8 x i32> %idx, <8 x i32> %mask) {
+  ; CHECK: vpgatherdd
+  %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
+                            i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
+                      <8 x i32>, <8 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
+                     <2 x i64> %idx, <4 x i32> %mask) {
+  ; CHECK: vpgatherqd
+  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
+                            i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
+                      <2 x i64>, <4 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
+                     <4 x i64> %idx, <4 x i32> %mask) {
+  ; CHECK: vpgatherqd
+  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
+                            i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
+                      <4 x i64>, <4 x i32>, i8) nounwind readonly
+
+; PR13298
+define <8 x float>  @test_gather_mask(<8 x float> %a0, float* %a,
+                                      <8 x i32> %idx, <8 x float> %mask,
+                                      float* nocapture %out) {
+; CHECK: test_gather_mask
+; CHECK: vmovdqa %ymm2, [[DEST:%.*]]
+; CHECK: vgatherdps [[DEST]]
+;; gather with mask
+  %a_i8 = bitcast float* %a to i8*
+  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+                           i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
+
+;; for debugging, we'll just dump out the mask
+  %out_ptr = bitcast float * %out to <8 x float> *
+  store <8 x float> %mask, <8 x float> * %out_ptr, align 4
+
+  ret <8 x float> %res
+}
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
new file mode 100644
index 0000000..c5899fa
--- /dev/null
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; Make sure that we don't match this shuffle using the vpblendw YMM instruction.
+; The mask for the vpblendw instruction needs to be identical for both halves
+; of the YMM. Need to use two vpblendw instructions.
+
+; CHECK: blendw1
+; CHECK: vpblendw
+; CHECK: vpblendw
+; CHECK: ret
+define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %t
+}
+
+; CHECK: vpshufhw $27, %ymm
+define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpshuflw $27, %ymm
+define <16 x i16> @vpshuflw(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 1a78414..b804233 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -160,6 +160,15 @@ entry:
   ret <8 x i32> %g
 }
 
+; CHECK: V113
+; CHECK: vbroadcastss
+; CHECK: ret
+define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+entry:
+  %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
+  ret <8 x float> %g
+}
+
 ; CHECK: _e2
 ; CHECK: vbroadcastss
 ; CHECK: ret
@@ -179,9 +188,170 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
-  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
-  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
-  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
-  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
   ret <8 x i8> %vecinit7.i
 }
+
+
+define void @crash() nounwind alwaysinline {
+WGLoopsEntry:
+  br i1 undef, label %ret, label %footer329VF
+
+footer329VF:
+  %A.0.inVF = fmul float undef, 6.553600e+04
+  %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
+  %A.0VF = fptosi float %A.0.inVF to i32
+  %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
+  %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %1 = and i32 %A.0VF, 65535
+  %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
+  %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
+  br i1 undef, label %preload1201VF, label %footer349VF
+
+preload1201VF:
+  br label %footer349VF
+
+footer349VF:
+  %2 = mul nsw <8 x i32> undef, %0
+  %3 = mul nsw <8 x i32> undef, %vector1099VF
+  br label %footer329VF
+
+ret:
+  ret void
+}
+
+; CHECK: _inreg0
+; CHECK: broadcastss
+; CHECK: ret
+define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
+  %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
+  %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
+  ret <8 x i32> %wide
+}
+
+; CHECK: _inreg1
+; CHECK: broadcastss
+; CHECK: ret
+define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
+  %in = insertelement <8 x float> undef, float %scalar, i32 0
+  %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %wide
+}
+
+; CHECK: _inreg2
+; CHECK: broadcastss
+; CHECK: ret
+define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
+  %in = insertelement <4 x float> undef, float %scalar, i32 0
+  %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %wide
+}
+
+; CHECK: _inreg3
+; CHECK: broadcastsd
+; CHECK: ret
+define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
+  %in = insertelement <4 x double> undef, double %scalar, i32 0
+  %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
+  ret <4 x double> %wide
+}
+
+;CHECK: _inreg8xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
+  %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %b
+}
+
+;CHECK: _inreg4xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
+  %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %b
+}
+
+;CHECK: _inreg16xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
+  ret <16 x i16> %b
+}
+
+;CHECK: _inreg8xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %b
+}
+
+
+;CHECK: _inreg4xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
+  ret <4 x i64> %b
+}
+
+;CHECK: _inreg2xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %b
+}
+
+;CHECK: _inreg4xdouble
+;CHECK: vbroadcastsd
+;CHECK: ret
+define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
+  %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
+  ret <4 x double> %b
+}
+
+;CHECK: _inreg2xdouble
+;CHECK: vpbroadcastq
+;CHECK: ret
+define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
+  %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %b
+}
+
+;CHECK: _inreg8xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
+  ret <8 x i32> %b
+}
+
+;CHECK: _inreg4xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %b
+}
+
+;CHECK: _inreg32xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
+  ret <32 x i8> %b
+}
+
+;CHECK: _inreg16xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %b
+}
diff --git a/test/CodeGen/X86/basic-promote-integers.ll b/test/CodeGen/X86/basic-promote-integers.ll
index c80f2b0..fce6b7f 100644
--- a/test/CodeGen/X86/basic-promote-integers.ll
+++ b/test/CodeGen/X86/basic-promote-integers.ll
@@ -1,7 +1,7 @@
 ; Test that vectors are scalarized/lowered correctly
 ; (with both legalization methods).
-; RUN: llc -march=x86 -promote-elements < %s
-; RUN: llc -march=x86                   < %s
+; RUN: llc -march=x86  < %s
+; RUN: llc -march=x86  < %s
 
 ; A simple test to check copyToParts and copyFromParts.
 
diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
index 633995d..3c499fa 100644
--- a/test/CodeGen/X86/bigstructret.ll
+++ b/test/CodeGen/X86/bigstructret.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: grep "movl	.24601, 12(%ecx)" %t
-; RUN: grep "movl	.48, 8(%ecx)" %t
-; RUN: grep "movl	.24, 4(%ecx)" %t
-; RUN: grep "movl	.12, (%ecx)" %t
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 %0 = type { i32, i32, i32, i32 }
+%1 = type { i1, i1, i1, i32 }
 
-define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+; CHECK: ReturnBigStruct
+; CHECK: movl $24601, 12(%ecx)
+; CHECK: movl	$48, 8(%ecx)
+; CHECK: movl	$24, 4(%ecx)
+; CHECK: movl	$12, (%ecx)
+
+define fastcc %0 @ReturnBigStruct() nounwind readnone {
 entry:
   %0 = insertvalue %0 zeroinitializer, i32 12, 0
   %1 = insertvalue %0 %0, i32 24, 1
@@ -15,3 +18,17 @@ entry:
   ret %0 %3
 }
 
+; CHECK: ReturnBigStruct2
+; CHECK: movl	$48, 4(%ecx)
+; CHECK: movb	$1, 2(%ecx)
+; CHECK: movb	$1, 1(%ecx)
+; CHECK: movb	$0, (%ecx)
+
+define fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+  %0 = insertvalue %1 zeroinitializer, i1 false, 0
+  %1 = insertvalue %1 %0, i1 true, 1
+  %2 = insertvalue %1 %1, i1 true, 2
+  %3 = insertvalue %1 %2, i32 48, 3
+  ret %1 %3
+}
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 3a10c70..11f811f 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
 
 
 ; In this test we check that sign-extend of the mask bit is performed by
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index fc7b638..5534712 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; that is not expected to run.
 ; CHECK: test_ifchains:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %else1
+; CHECK-NOT: .align
 ; CHECK: %else2
+; CHECK-NOT: .align
 ; CHECK: %else3
+; CHECK-NOT: .align
 ; CHECK: %else4
+; CHECK-NOT: .align
 ; CHECK: %exit
 ; CHECK: %then1
 ; CHECK: %then2
@@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK: test_loop_cold_blocks:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %unlikely1
+; CHECK-NOT: .align
 ; CHECK: %unlikely2
+; CHECK: .align
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
@@ -634,7 +642,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
 ;
 ; CHECK: test_unnatural_cfg_backwards_inner_loop
 ; CHECK: %entry
-; CHECK: %body
+; CHECK: [[BODY:# BB#[0-9]+]]:
 ; CHECK: %loop2b
 ; CHECK: %loop1
 ; CHECK: %loop2a
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
new file mode 100644
index 0000000..0cb9fd9
--- /dev/null
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
+
+define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 %a, i32 %b
+  ret i32 %t3
+; CHECK: foo
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: cmov
+; CHECK: ret
+}
+
+define i32 @bar(<2 x i64> %c) {
+entry:
+  %0 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %if-true-block, label %endif-block
+if-true-block:                                    ; preds = %entry
+  ret i32 0
+endif-block:                                      ; preds = %entry,
+  ret i32 1
+; CHECK: bar
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: jne
+; CHECK: ret
+}
+
+define i32 @bax(<2 x i64> %c) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp eq i32 %t1, 1
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+; CHECK: bax
+; CHECK: ptest
+; CHECK-NOT: cmpl
+; CHECK: ret
+}
+
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 2c37194..5223463 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
 
 ; CHECK: orq
-; CHECK-NEXT: LBB0_1
+; CHECK-NEXT: %bb8.i329
 
 @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
 @_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 93b2043..c942614 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,10 +1,12 @@
 ; Without list-burr scheduling we may not see the difference in codegen here.
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
-; RUN:   grep {%xmm0} %t | count 14
-; RUN:   not grep {%xmm1} %t
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
-; RUN:   grep {%xmm0} %t | count 7
-; RUN:   grep {%xmm1} %t | count 7
+; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
+; breaker requires liveness information to be kept.
+; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN:   grep "%xmm0" %t | count 14
+; RUN:   not grep "%xmm1" %t
+; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
+; RUN:   grep "%xmm0" %t | count 7
+; RUN:   grep "%xmm1" %t | count 7
 
 define void @goo(double* %r, double* %p, double* %q) nounwind {
 entry:
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 2dee575..4d80189 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 -mcpu=nehalem | FileCheck %s
 
 define double @t1(float* nocapture %x) nounwind readonly ssp {
 entry:
@@ -34,8 +34,7 @@ entry:
 define double @squirt(double* %x) nounwind {
 entry:
 ; CHECK: squirt:
-; CHECK: movsd ([[A0]]), %xmm0
-; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 3857fb1..38cda4d 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678}
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678}
-; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep "call.*12345678"
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
+; RUN: llc < %s -march=x86-64 | grep "call.*\*%rax"
 
 ; PR3666
 ; PR3773
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
index 7420ce7..8cdd59e 100644
--- a/test/CodeGen/X86/cfstring.ll
+++ b/test/CodeGen/X86/cfstring.ll
@@ -4,7 +4,7 @@
 %0 = type opaque
 %struct.NSConstantString = type { i32*, i32, i8*, i32 }
 
-; Make sure that the string ends up the the correct section.
+; Make sure that the string ends up the correct section.
 
 ; CHECK:        .section __TEXT,__cstring
 ; CHECK-NEXT: l_.str3:
diff --git a/test/CodeGen/X86/cmov-into-branch.ll b/test/CodeGen/X86/cmov-into-branch.ll
new file mode 100644
index 0000000..780746a
--- /dev/null
+++ b/test/CodeGen/X86/cmov-into-branch.ll
@@ -0,0 +1,63 @@
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s
+
+; cmp with single-use load, should not form cmov.
+define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y)  {
+  %load = load double* %b, align 8
+  %cmp = fcmp olt double %load, %a
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %cond
+; CHECK: test1:
+; CHECK: ucomisd
+; CHECK-NOT: cmov
+; CHECK: j
+; CHECK-NOT: cmov
+}
+
+; Sanity check: no load.
+define i32 @test2(double %a, double %b, i32 %x, i32 %y)  {
+  %cmp = fcmp ogt double %a, %b
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %cond
+; CHECK: test2:
+; CHECK: ucomisd
+; CHECK: cmov
+}
+
+; Multiple uses of %a, should not form cmov.
+define i32 @test3(i32 %a, i32* nocapture %b, i32 %x)  {
+  %load = load i32* %b, align 4
+  %cmp = icmp ult i32 %load, %a
+  %cond = select i1 %cmp, i32 %a, i32 %x
+  ret i32 %cond
+; CHECK: test3:
+; CHECK: cmpl
+; CHECK-NOT: cmov
+; CHECK: j
+; CHECK-NOT: cmov
+}
+
+; Multiple uses of the load.
+define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y)  {
+  %load = load i32* %b, align 4
+  %cmp = icmp ult i32 %load, %a
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %add = add i32 %cond, %load
+  ret i32 %add
+; CHECK: test4:
+; CHECK: cmpl
+; CHECK: cmov
+}
+
+; Multiple uses of the cmp.
+define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
+  %load = load i32* %b, align 4
+  %cmp = icmp ult i32 %load, %a
+  %cmp1 = icmp ugt i32 %load, %a
+  %cond = select i1 %cmp1, i32 %a, i32 %y
+  %cond5 = select i1 %cmp, i32 %cond, i32 %x
+  ret i32 %cond5
+; CHECK: test5:
+; CHECK: cmpl
+; CHECK: cmov
+; CHECK: cmov
+}
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 2e7ffbf..ed25c82 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-cgp-select2branch | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test1:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -19,8 +19,8 @@ entry:
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test2:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index ef5e353..eb06327 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -90,3 +90,64 @@ F:
 ; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
 }
 
+; rdar://11866926
+define i32 @test7(i64 %res) nounwind {
+entry:
+; CHECK: test7:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+  %lnot = icmp ult i64 %res, 4294967296
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test8(i64 %res) nounwind {
+entry:
+; CHECK: test8:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: cmpq $3, %rdi
+  %lnot = icmp ult i64 %res, 12884901888
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test9(i64 %res) nounwind {
+entry:
+; CHECK: test9:
+; CHECK-NOT: movabsq
+; CHECK: shrq $33, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+  %lnot = icmp ult i64 %res, 8589934592
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test10(i64 %res) nounwind {
+entry:
+; CHECK: test10:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: setne
+  %lnot = icmp uge i64 %res, 4294967296
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+; rdar://9758774
+define i32 @test11(i64 %l) nounwind {
+entry:
+; CHECK: test11:
+; CHECK-NOT: movabsq
+; CHECK-NOT: andq
+; CHECK: shrq $47, %rdi
+; CHECK: cmpq $1, %rdi
+  %shr.mask = and i64 %l, -140737488355328
+  %cmp = icmp eq i64 %shr.mask, 140737488355328
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
index a584876..4004379 100644
--- a/test/CodeGen/X86/coalesce-esp.ll
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl	%esp, %ebp}
+; RUN: llc < %s | grep "movl	%esp, %ebp"
 ; PR4572
 
 ; Don't coalesce with %esp if it would end up putting %esp in
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 6e5c1cf..e45437c 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=nehalem | FileCheck %s
 ; CHECK-NOT:     mov
 ; CHECK:     paddw
 ; CHECK-NOT:     mov
@@ -26,14 +26,3 @@ entry:
 	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %tmp10
 }
-
-
-; The coalescer should commute the add to avoid a copy.
-define <4 x float> @test3(<4 x float> %V) {
-entry:
-        %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
-                                        <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
-        %add = fadd <4 x float> %tmp8, %V
-        ret <4 x float> %add
-}
-
diff --git a/test/CodeGen/X86/coalescer-dce2.ll b/test/CodeGen/X86/coalescer-dce2.ll
new file mode 100644
index 0000000..bbbf09b
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-dce2.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s -verify-coalescing
+; PR12911
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@h = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@a = common global i16 0, align 2
+@e = common global i32 0, align 4
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @d, align 4
+  %tobool72 = icmp eq i32 %0, 0
+  br i1 %tobool72, label %for.end32, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %1 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %1, 0
+  %2 = load i32* @b, align 4
+  %cmp = icmp sgt i32 %2, 0
+  %conv = zext i1 %cmp to i32
+  %3 = load i32* @g, align 4
+  %tobool4 = icmp eq i32 %3, 0
+  %4 = load i16* @a, align 2
+  %tobool9 = icmp eq i16 %4, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond25.loopexit.us-lcssa.us-lcssa, %if.end.us50, %if.end.us, %if.end.us.us, %for.cond1.preheader.lr.ph
+  %j.073 = phi i32 [ undef, %for.cond1.preheader.lr.ph ], [ %j.1.us.us, %if.end.us.us ], [ %j.1.us, %if.end.us ], [ %j.073, %for.cond25.loopexit.us-lcssa.us-lcssa ], [ %j.1.us36, %if.end.us50 ]
+  br i1 %tobool2, label %for.cond1.preheader.split.us, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+
+for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader
+  br i1 %tobool9, label %if.end.us50, label %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+
+for.cond1.preheader.split.us:                     ; preds = %for.cond1.preheader
+  br i1 %tobool9, label %cond.end.us.us, label %cond.end.us
+
+cond.false18.us.us:                               ; preds = %if.end.us.us
+  %5 = load i32* @f, align 4
+  %sext76 = shl i32 %5, 16
+  %phitmp75 = ashr exact i32 %sext76, 16
+  br label %cond.end.us.us
+
+if.end.us.us:                                     ; preds = %cond.end.us.us, %if.then.us.us
+  br i1 %tobool4, label %cond.false18.us.us, label %for.cond1.preheader
+
+if.then.us.us:                                    ; preds = %cond.end.us.us
+  store i32 0, i32* @f, align 4
+  br label %if.end.us.us
+
+cond.end.us.us:                                   ; preds = %cond.false18.us.us, %for.cond1.preheader.split.us
+  %j.1.us.us = phi i32 [ %j.073, %for.cond1.preheader.split.us ], [ %phitmp75, %cond.false18.us.us ]
+  store i32 %conv, i32* @h, align 4
+  br i1 %cmp, label %if.then.us.us, label %if.end.us.us
+
+cond.end21.us:                                    ; preds = %land.lhs.true12.us, %cond.false18.us
+  %cond22.us = phi i16 [ %add.us, %cond.false18.us ], [ %4, %land.lhs.true12.us ]
+  %conv24.us = sext i16 %cond22.us to i32
+  br label %cond.end.us
+
+cond.false18.us:                                  ; preds = %if.end6.us, %land.lhs.true12.us
+  %add.us = add i16 %4, %conv7.us
+  br label %cond.end21.us
+
+land.lhs.true12.us:                               ; preds = %if.end6.us
+  %conv10.us = sext i16 %conv7.us to i32
+  %sub.us = sub nsw i32 0, %conv10.us
+  %cmp14.us = icmp slt i32 %sub.us, 1
+  br i1 %cmp14.us, label %cond.end21.us, label %cond.false18.us
+
+if.end6.us:                                       ; preds = %if.end.us
+  %6 = load i32* @f, align 4
+  %conv7.us = trunc i32 %6 to i16
+  %tobool11.us = icmp eq i16 %conv7.us, 0
+  br i1 %tobool11.us, label %cond.false18.us, label %land.lhs.true12.us
+
+if.end.us:                                        ; preds = %cond.end.us, %if.then.us
+  br i1 %tobool4, label %if.end6.us, label %for.cond1.preheader
+
+if.then.us:                                       ; preds = %cond.end.us
+  store i32 0, i32* @f, align 4
+  br label %if.end.us
+
+cond.end.us:                                      ; preds = %cond.end21.us, %for.cond1.preheader.split.us
+  %j.1.us = phi i32 [ %conv24.us, %cond.end21.us ], [ %j.073, %for.cond1.preheader.split.us ]
+  store i32 %conv, i32* @h, align 4
+  br i1 %cmp, label %if.then.us, label %if.end.us
+
+for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge: ; preds = %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+  br i1 %tobool4, label %if.end6.us65, label %for.cond25.loopexit.us-lcssa.us-lcssa
+
+cond.false18.us40:                                ; preds = %if.end.us50
+  %7 = load i32* @f, align 4
+  %sext = shl i32 %7, 16
+  %phitmp = ashr exact i32 %sext, 16
+  br label %if.end.us50
+
+if.end.us50:                                      ; preds = %cond.false18.us40, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+  %j.1.us36 = phi i32 [ %j.073, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %phitmp, %cond.false18.us40 ]
+  store i32 0, i32* @h, align 4
+  br i1 %tobool4, label %cond.false18.us40, label %for.cond1.preheader
+
+if.end6.us65:                                     ; preds = %if.end6.us65, %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+  store i32 0, i32* @h, align 4
+  br label %if.end6.us65
+
+for.cond25.loopexit.us-lcssa.us-lcssa:            ; preds = %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+  store i32 0, i32* @h, align 4
+  br label %for.cond1.preheader
+
+for.end32:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/coalescer-identity.ll b/test/CodeGen/X86/coalescer-identity.ll
new file mode 100644
index 0000000..9c72ee6
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-identity.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -verify-coalescing
+; PR12927
+target triple = "x86_64-apple-macosx10.8.0"
+
+; This is a case where removeCopyByCommutingDef() creates an identity copy that
+; joinCopy must then deal with correctly.
+
+@s = common global i16 0, align 2
+@g1 = common global i32 0, align 4
+@g2 = common global i32 0, align 4
+@g0 = common global i32 0, align 4
+
+define void @func() nounwind uwtable ssp {
+for.body.lr.ph:
+  %0 = load i32* @g2, align 4, !tbaa !0
+  %tobool6 = icmp eq i32 %0, 0
+  %s.promoted = load i16* @s, align 2
+  %.pre = load i32* @g1, align 4, !tbaa !0
+  br i1 %tobool6, label %for.body.us, label %for.body
+
+for.body.us:                                      ; preds = %for.body.lr.ph, %for.inc.us
+  %1 = phi i32 [ %3, %for.inc.us ], [ %.pre, %for.body.lr.ph ]
+  %dec13.us = phi i16 [ %dec12.us, %for.inc.us ], [ %s.promoted, %for.body.lr.ph ]
+  %i.011.us = phi i32 [ %inc.us, %for.inc.us ], [ undef, %for.body.lr.ph ]
+  %v.010.us = phi i32 [ %phitmp.us, %for.inc.us ], [ 1, %for.body.lr.ph ]
+  %tobool1.us = icmp ne i32 %v.010.us, 0
+  %2 = zext i1 %tobool1.us to i16
+  %lnot.ext.us = xor i16 %2, 1
+  %add.us = add i16 %dec13.us, %lnot.ext.us
+  %conv3.us = zext i16 %add.us to i32
+  %add4.us = sub i32 0, %1
+  %tobool5.us = icmp eq i32 %conv3.us, %add4.us
+  br i1 %tobool5.us, label %for.inc.us, label %if.then7.us
+
+for.inc.us:                                       ; preds = %cond.end.us, %for.body.us
+  %3 = phi i32 [ %1, %for.body.us ], [ %4, %cond.end.us ]
+  %dec12.us = phi i16 [ %add.us, %for.body.us ], [ %dec.us, %cond.end.us ]
+  %inc.us = add i32 %i.011.us, 1
+  %phitmp.us = udiv i32 %v.010.us, 12
+  %tobool.us = icmp eq i32 %inc.us, 0
+  br i1 %tobool.us, label %for.end, label %for.body.us
+
+cond.end.us:                                      ; preds = %if.then7.us, %cond.false.us
+  %4 = phi i32 [ 0, %cond.false.us ], [ %1, %if.then7.us ]
+  %cond.us = phi i32 [ 0, %cond.false.us ], [ %v.010.us, %if.then7.us ]
+  store i32 %cond.us, i32* @g0, align 4, !tbaa !0
+  br label %for.inc.us
+
+cond.false.us:                                    ; preds = %if.then7.us
+  store i32 0, i32* @g1, align 4, !tbaa !0
+  br label %cond.end.us
+
+if.then7.us:                                      ; preds = %for.body.us
+  %dec.us = add i16 %add.us, -1
+  br i1 %tobool1.us, label %cond.end.us, label %cond.false.us
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %dec13 = phi i16 [ %dec12, %for.body ], [ %s.promoted, %for.body.lr.ph ]
+  %i.011 = phi i32 [ %inc, %for.body ], [ undef, %for.body.lr.ph ]
+  %v.010 = phi i32 [ %phitmp, %for.body ], [ 1, %for.body.lr.ph ]
+  %tobool1 = icmp eq i32 %v.010, 0
+  %lnot.ext = zext i1 %tobool1 to i16
+  %add = add i16 %dec13, %lnot.ext
+  %conv3 = zext i16 %add to i32
+  %add4 = sub i32 0, %.pre
+  %not.tobool5 = icmp ne i32 %conv3, %add4
+  %dec = sext i1 %not.tobool5 to i16
+  %dec12 = add i16 %add, %dec
+  %inc = add i32 %i.011, 1
+  %phitmp = udiv i32 %v.010, 12
+  %tobool = icmp eq i32 %inc, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc.us, %for.body
+  %dec12.lcssa = phi i16 [ %dec12.us, %for.inc.us ], [ %dec12, %for.body ]
+  store i16 %dec12.lcssa, i16* @s, align 2
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
index f979945..26318dd 100644
--- a/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s
 
 ; llc should share constant pool entries between this integer vector
 ; and this floating-point vector since they have the same encoding.
diff --git a/test/CodeGen/X86/constructor.ll b/test/CodeGen/X86/constructor.ll
new file mode 100644
index 0000000..b578896
--- /dev/null
+++ b/test/CodeGen/X86/constructor.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=CTOR %s
+; RUN: llc -mtriple x86_64-pc-linux -use-init-array < %s | FileCheck --check-prefix=INIT-ARRAY %s
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }, { i32, void ()* } { i32 15, void ()* @g }]
+
+define void @f() {
+entry:
+  ret void
+}
+
+define void @g() {
+entry:
+  ret void
+}
+
+; CTOR:		.section	.ctors.65520,"aw",@progbits
+; CTOR-NEXT:	.align	8
+; CTOR-NEXT:	.quad	g
+; CTOR-NEXT:	.section	.ctors,"aw",@progbits
+; CTOR-NEXT:	.align	8
+; CTOR-NEXT:	.quad	f
+
+; INIT-ARRAY:		.section	.init_array.15,"aw",@init_array
+; INIT-ARRAY-NEXT:	.align	8
+; INIT-ARRAY-NEXT:	.quad	g
+; INIT-ARRAY-NEXT:	.section	.init_array,"aw",@init_array
+; INIT-ARRAY-NEXT:	.align	8
+; INIT-ARRAY-NEXT:	.quad	f
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index b82348b..064ee36 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
-; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; STATS: 9 asm-printer
 
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index cf6e27d..9badfc8 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86 %s -o -
-; RUN: llc -march=x86-64 %s -o -
+; RUN: llc -march=x86 < %s -verify-machineinstrs
+; RUN: llc -march=x86-64 < %s -verify-machineinstrs
 
 ; PR6497
 
@@ -391,3 +391,54 @@ if.end:
   %t11 = tail call i64 asm sideeffect "foo", "=*m,=A,{bx},{cx},1,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %t6, i32 0, i32 0, i64 0) nounwind
   ret void
 }
+
+; Avoid emitting wrong kill flags from InstrEmitter.
+; InstrEmitter::EmitSubregNode() may steal virtual registers from already
+; emitted blocks when isCoalescableExtInstr points out the opportunity.
+; Make sure kill flags are cleared on the newly global virtual register.
+define i64 @ov_read(i8* %vf, i8* nocapture %buffer, i32 %length, i32 %bigendianp, i32 %word, i32 %sgned, i32* %bitstream) nounwind uwtable ssp {
+entry:
+  br i1 undef, label %return, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br i1 undef, label %if.then3, label %if.end7
+
+if.then3:                                         ; preds = %while.body.preheader
+  %0 = load i32* undef, align 4
+  br i1 undef, label %land.lhs.true.i255, label %if.end7
+
+land.lhs.true.i255:                               ; preds = %if.then3
+  br i1 undef, label %if.then.i256, label %if.end7
+
+if.then.i256:                                     ; preds = %land.lhs.true.i255
+  %sub.i = sub i32 0, %0
+  %conv = sext i32 %sub.i to i64
+  br i1 undef, label %if.end7, label %while.end
+
+if.end7:                                          ; preds = %if.then.i256, %land.lhs.true.i255, %if.then3, %while.body.preheader
+  unreachable
+
+while.end:                                        ; preds = %if.then.i256
+  %cmp18 = icmp sgt i32 %sub.i, 0
+  %.conv = select i1 %cmp18, i64 -131, i64 %conv
+  ret i64 %.conv
+
+return:                                           ; preds = %entry
+  ret i64 -131
+}
+
+; The tail call to a varargs function sets %AL.
+; uitofp expands to an FCMOV instruction which splits the basic block.
+; Make sure the live range of %AL isn't split.
+@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
+define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+entry:
+  %x7 = load i64* %this, align 8
+  %sub = add i64 %x7, -1
+  %conv = uitofp i64 %sub to float
+  %div = fmul float %conv, 5.000000e-01
+  %conv2 = fpext float %div to double
+  tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+  ret void
+}
+declare void @_Z6PrintFz(...)
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
index 6406cc7..0a3dfca 100644
--- a/test/CodeGen/X86/ctpop-combine.ll
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
 
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index c3c7990..af69531 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index c35935f..d1e349f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp5
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 28d873b..6b16865 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic -join-physregs < %s | FileCheck %s
 
 %struct.a = type { i32 }
 
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index e577ecb..8e7c13d 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -71,3 +71,24 @@ define i32 @test7(i32 %x) nounwind {
 ; CHECK-NOT: shrl
 ; CHECK: ret
 }
+
+; PR13326
+define i8 @test8(i8 %x) nounwind {
+  %div = udiv i8 %x, 78
+  ret i8 %div
+; CHECK: test8:
+; CHECK: shrb %
+; CHECK: imull $211
+; CHECK: shrl $13
+; CHECK: ret
+}
+
+define i8 @test9(i8 %x) nounwind {
+  %div = udiv i8 %x, 116
+  ret i8 %div
+; CHECK: test9:
+; CHECK: shrb $2
+; CHECK: imull $71
+; CHECK: shrl $11
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
new file mode 100644
index 0000000..c5e47fa
--- /dev/null
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
+; rdar://11496434
+
+; no VLAs or dynamic alignment
+define i32 @t1() nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  call void @t1_helper(i32* %a) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t1
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
+; CHECK: callq _t1_helper
+; CHECK: movl [[OFFSET]](%rsp), %eax
+; CHECK: addl $13, %eax
+}
+
+declare void @t1_helper(i32*)
+
+; dynamic realignment
+define i32 @t2() nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %v = alloca <8 x float>, align 32
+  call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t2
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
+; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
+; CHECK: callq _t2_helper
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t2_helper(i32*, <8 x float>*)
+
+; VLAs
+define i32 @t3(i64 %sz) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t3_helper(i32* %a, i32* %vla) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t3
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %rbx
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %rbp
+}
+
+declare void @t3_helper(i32*, i32*)
+
+; VLAs + Dynamic realignment
+define i32 @t4(i64 %sz) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %v = alloca <8 x float>, align 32
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t4
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %r14
+; CHECK: pushq %rbx
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+; CHECK: movq %rsp, %rbx
+;
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
+; CHECK: callq   _t4_helper
+;
+; CHECK: leaq -16(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+declare void @t4_helper(i32*, i32*, <8 x float>*)
+
+; Dynamic realignment + Spill
+define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %0 = bitcast float* %f to <8 x float>*
+  %1 = load <8 x float>* %0, align 32
+  call void @t5_helper1(i32* %a) nounwind
+  call void @t5_helper2(<8 x float> %1) nounwind
+  %2 = load i32* %a, align 4
+  %add = add nsw i32 %2, 13
+  ret i32 %add
+
+; CHECK: _t5
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
+; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK: callq   _t5_helper1
+; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: callq   _t5_helper2
+; CHECK: movl {{[0-9]+}}(%rsp), %eax
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t5_helper1(i32*)
+
+declare void @t5_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + Spill
+; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
+define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
+entry:
+; CHECK: _t6
+  %a = alloca i32, align 4
+  %0 = bitcast float* %f to <8 x float>*
+  %1 = load <8 x float>* %0, align 32
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t6_helper1(i32* %a, i32* %vla) nounwind
+  call void @t6_helper2(<8 x float> %1) nounwind
+  %2 = load i32* %a, align 4
+  %add = add nsw i32 %2, 13
+  ret i32 %add
+}
+
+declare void @t6_helper1(i32*, i32*)
+
+declare void @t6_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + byval
+; The byval adjust the sp after the prolog, but if we're restoring the sp from
+; the base pointer we use the original adjustment.
+%struct.struct_t = type { [5 x i32] }
+
+define void @t7(i32 %size, %struct.struct_t* byval align 8 %arg1) nounwind uwtable {
+entry:
+  %x = alloca i32, align 32
+  store i32 0, i32* %x, align 32
+  %0 = zext i32 %size to i64
+  %vla = alloca i32, i64 %0, align 16
+  %1 = load i32* %x, align 32
+  call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1)
+  ret void
+
+; CHECK: _t7
+; CHECK:     pushq %rbp
+; CHECK:     movq %rsp, %rbp
+; CHECK:     pushq %rbx
+; CHECK:     andq $-32, %rsp
+; CHECK:     subq ${{[0-9]+}}, %rsp
+; CHECK:     movq %rsp, %rbx
+
+; Stack adjustment for byval
+; CHECK:     subq {{.*}}, %rsp
+; CHECK:     callq _bar
+; CHECK-NOT: addq {{.*}}, %rsp
+; CHECK:     leaq -8(%rbp), %rsp
+; CHECK:     popq %rbx
+; CHECK:     popq %rbp
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @bar(i32, i32*, %struct.struct_t* byval align 8)
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+
+; Test when forcing stack alignment
+define i32 @t8() nounwind uwtable {
+entry:
+  %a = alloca i32, align 4
+  call void @t1_helper(i32* %a) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; FORCE-ALIGN: _t8
+; FORCE-ALIGN:      movq %rsp, %rbp
+; FORCE-ALIGN:      andq $-32, %rsp
+; FORCE-ALIGN-NEXT: subq $32, %rsp
+; FORCE-ALIGN:      movq %rbp, %rsp
+; FORCE-ALIGN:      popq %rbp
+}
+
+; VLAs
+define i32 @t9(i64 %sz) nounwind uwtable {
+entry:
+  %a = alloca i32, align 4
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t3_helper(i32* %a, i32* %vla) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; FORCE-ALIGN: _t9
+; FORCE-ALIGN: pushq %rbp
+; FORCE-ALIGN: movq %rsp, %rbp
+; FORCE-ALIGN: pushq %rbx
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN: subq $32, %rsp
+; FORCE-ALIGN: movq %rsp, %rbx
+
+; FORCE-ALIGN: leaq -8(%rbp), %rsp
+; FORCE-ALIGN: popq %rbx
+; FORCE-ALIGN: popq %rbp
+}
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
new file mode 100644
index 0000000..7883ffa
--- /dev/null
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: mm2
+define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  br label %do.body
+
+; CHECK: do.body
+; Loop body has no branches before the backedge.
+; CHECK-NOT: LBB
+do.body:
+  %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
+  %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
+  %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
+  %0 = load i32* %p.addr.0, align 4
+  %cmp = icmp sgt i32 %0, %max.0
+  br i1 %cmp, label %do.cond, label %if.else
+
+if.else:
+  %cmp1 = icmp slt i32 %0, %min.0
+  %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
+  br label %do.cond
+
+do.cond:
+  %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
+  %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
+; CHECK: decl %esi
+; CHECK: jne LBB
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:
+  %sub = sub nsw i32 %max.1, %min.1
+  ret i32 %sub
+}
+
+; CHECK: multipreds
+; Deal with alternative tail predecessors
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: fprintf
+
+define void @multipreds(i32 %sw) nounwind uwtable ssp {
+entry:
+  switch i32 %sw, label %if.then29 [
+    i32 0, label %if.then37
+    i32 127, label %if.end41
+  ]
+
+if.then29:
+  br label %if.end41
+
+if.then37:
+  br label %if.end41
+
+if.end41:
+  %exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
+  call void (...)* @fprintf(i32 %exit_status.0) nounwind
+  unreachable
+}
+
+declare void @fprintf(...) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 0f16a64..090680e 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
-; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+
+; CHECK-NOT: lea{{.*}}(%esp)
+; CHECK: {{(mov.* %ebp, %esp)|(lea.*\(%ebp\), %esp)}}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 14778f0..9e1a375 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mcpu=penryn > %t
 ; RUN: not grep movd %t
-; RUN: grep {movss	%xmm} %t | count 1
-; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
+; RUN: grep "movss	%xmm" %t | count 1
+; RUN: grep "extractps	\$1, %xmm0, " %t | count 1
 ; PR2647
 
 external global float, align 16         ; <float*>:0 [#uses=2]
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 9ded7e0..af1867f 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,28 +1,54 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
-; RUN:   count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
-; RUN:   grep fabs\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse2,-sse3,-sse | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -O0 | FileCheck %s --check-prefix=NOOPT
 
 declare float @fabsf(float)
 
 declare x86_fp80 @fabsl(x86_fp80)
 
+; CHECK:  test1:
+; UNSAFE: test1:
+; NOOPT:  test1:
 define float @test1(float %X) {
-        %Y = call float @fabsf(float %X)
+        %Y = call float @fabsf(float %X) readnone
         ret float %Y
 }
+; CHECK:  {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
 
+; CHECK-NOT:  fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabsf
+
+; CHECK:  test2:
+; UNSAFE: test2:
+; NOOPT:  test2:
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
         %Z = fsub double -0.0, %X
         %Q = select i1 %Y, double %X, double %Z
         ret double %Q
 }
+; fabs is not used here.
+; CHECK-NOT:  fabs
+; NOOPT-NOT:  fabs
+
+; UNSAFE: {{^[ \t]+fabs$}}
 
+; UNSAFE-NOT: fabs
+
+; CHECK:  test3:
+; UNSAFE: test3:
+; NOOPT:  test3:
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+        %Y = call x86_fp80 @fabsl(x86_fp80 %X) readnone
         ret x86_fp80 %Y
 }
+; CHECK:  {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
+; NOOPT:  {{^[ \t]+fabs$}}
 
-
+; CHECK-NOT:  fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabs
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e4982f0..14cb136 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {add	ESP, 8}
+; RUN:   grep "add	ESP, 8"
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index 323c853..b3adb80 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)}
+; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)"
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index 34f8b38..cb2464e 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
+; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)"
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 8db1936..52b1e85 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=generic | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 @src = external global i32
 
@@ -18,6 +19,13 @@ entry:
 ; CHECK: 	movl	%eax, (%ecx)
 ; CHECK: 	ret
 
+; ATOM:	loadgv:
+; ATOM:		movl    L_src$non_lazy_ptr, %ecx
+; ATOM:         movl    (%ecx), %eax
+; ATOM:         addl    (%ecx), %eax
+; ATOM:         movl    %eax, (%ecx)
+; ATOM:         ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -31,4 +39,8 @@ entry:
 ; CHECK:	movl	$0, %eax
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
+; ATOM: _t:
+; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %ecx
+; ATOM:         movl    $0, %eax
+
 }
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index b9598bb..19f3888 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -46,3 +46,17 @@ entry:
 ; CHECK: addl $40
 }
 declare void @test3sret(%struct.a* sret)
+
+; Check that fast-isel sret works with fastcc (and does not callee-pop)
+define void @test4() nounwind ssp {
+entry:
+  %tmp = alloca %struct.a, align 8
+  call fastcc void @test4fastccsret(%struct.a* sret %tmp)
+  ret void
+; CHECK: test4:
+; CHECK: subl $28
+; CHECK: leal (%esp), %ecx
+; CHECK: calll _test4fastccsret
+; CHECK addl $28
+}
+declare fastcc void @test4fastccsret(%struct.a* sret)
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index c88d529..132df2b 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -mtriple=x86_64-apple-darwin10
 
 ; This tests very minimal fast-isel functionality.
 
@@ -117,3 +117,11 @@ define i64* @life() nounwind {
   ret i64* %a3
 }
 
+declare void @llvm.donothing() readnone
+
+; CHECK: donada
+define void @donada() nounwind {
+entry:
+  call void @llvm.donothing()
+  ret void
+}
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index 52b3e57..f1204d6 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 5deedb9..b0c1d0a 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10               | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10             | FileCheck %s --check-prefix=CHECK-FMA-CALL
 
 ; CHECK: test_f32
-; CHECK: _fmaf
+; CHECK-FMA-INST: vfmadd213ss
+; CHECK-FMA-CALL: _fmaf
 
 define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
@@ -11,7 +14,8 @@ entry:
 }
 
 ; CHECK: test_f64
-; CHECK: _fma
+; CHECK-FMA-INST: vfmadd213sd
+; CHECK-FMA-CALL: _fma
 
 define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
new file mode 100755
index 0000000..90529e0
--- /dev/null
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
+
+define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fmadd213ss %xmm
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fmadd213ps
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fnmadd213ss %xmm
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fnmadd213ps
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fmsub213ss
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fmsub213ps
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fnmsub213ss
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: fnmsub213ps
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+;;;;
+
+define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fmadd213sd
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fmadd213pd
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fnmadd213sd
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fnmadd213pd
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+
+define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fmsub213sd
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fmsub213pd
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fnmsub213sd
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: fnmsub213pd
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 5ed03ef..fd414b3 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -1,295 +1,295 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
 
 ; VFMADD
-define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddss
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
   ; CHECK: vfmaddss (%{{.*}})
   %x = load float *%a2
   %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
   %x = load float *%a1
   %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddsd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
   ; CHECK: vfmaddsd (%{{.*}})
   %x = load double *%a2
   %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
   %x = load double *%a1
   %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddps
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
   ; CHECK: vfmaddps (%{{.*}})
   %x = load <4 x float>* %a2
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
   %x = load <4 x float>* %a1
-  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
   ; CHECK: vfmaddpd (%{{.*}})
   %x = load <2 x double>* %a2
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
   %x = load <2 x double>* %a1
-  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmaddps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfmaddpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
 
 ; VFMSUB
-define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmsubss
-  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmsubsd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmsubps
-  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmsubpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmsubps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfmsubpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
 
 ; VFNMADD
-define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfnmaddss
-  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfnmaddsd
-  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfnmaddps
-  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfnmaddpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfnmaddps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfnmaddpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
 
 ; VFNMSUB
-define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfnmsubss
-  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfnmsubsd
-  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfnmsubps
-  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfnmsubpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfnmsubps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfnmsubpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
 
 ; VFMADDSUB
-define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddsubps
-  %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddsubpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmaddsubps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfmaddsubpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
 
 ; VFMSUBADD
-define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
   ; CHECK: vfmsubaddps
-  %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
-declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmsubaddpd
-  %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
-define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmsubaddps
   ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
   ret < 8 x float > %res
 }
-declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
 
-define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
   ; CHECK: vfmsubaddpd
   ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
   ret < 4 x double > %res
 }
-declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
new file mode 100644
index 0000000..5d97a87
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vfmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fadd <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: fmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps
+; CHECK: fnmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %a2, %x
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps
+; CHECK: fnmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmadd_ps_y
+; CHECK: vfmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fadd <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y
+; CHECK: vfmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y
+; CHECK: vfnmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %a2, %x
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y
+; CHECK: vfnmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <8 x float> %y, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y
+; CHECK: vfmadd213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fadd <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y
+; CHECK: vfmsub213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd
+; CHECK: vfmsub213pd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+; CHECK: test_x86_fnmadd_ss
+; CHECK: vfnmadd213ss    %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+  %x = fmul float %a0, %a1
+  %res = fsub float %a2, %x
+  ret float %res
+}
+
+; CHECK: test_x86_fnmadd_sd
+; CHECK: vfnmadd213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %a2, %x
+  ret double %res
+}
+
+; CHECK: test_x86_fmsub_sd
+; CHECK: vfmsub213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+; CHECK: test_x86_fnmsub_ss
+; CHECK: vfnmsub213ss     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
+  %x = fsub float -0.000000e+00, %a0
+  %y = fmul float %x, %a1
+  %res = fsub float %y, %a2
+  ret float %res
+}
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index e03cb7e..c961f75 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -45,3 +45,29 @@ L:
 
 }
 
+; rdar://10554090
+; xor in exit block will be CSE'ed and load will be folded to xor in entry.
+define i1 @test3(i32* %P, i32* %Q) nounwind {
+; CHECK: test3:
+; CHECK: movl 8(%esp), %eax
+; CHECK: xorl (%eax),
+; CHECK: j
+; CHECK-NOT: xor
+entry:
+  %0 = load i32* %P, align 4
+  %1 = load i32* %Q, align 4
+  %2 = xor i32 %0, %1
+  %3 = and i32 %2, 65535
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %exit, label %land.end
+
+exit:
+  %shr.i.i19 = xor i32 %1, %0
+  %5 = and i32 %shr.i.i19, 2147418112
+  %6 = icmp eq i32 %5, 0
+  br label %land.end
+
+land.end:
+  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
+  ret i1 %7
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index cc4198d..d850630 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pcmpeqd %t | count 1
-; RUN: grep xor %t | count 1
-; RUN: not grep LCP %t
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
 
 define <2 x double> @foo() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+; CHECK: foo:
+; CHECK: pcmpeqd %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
 define <2 x double> @bar() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+; CHECK: bar:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
new file mode 100644
index 0000000..2ada194
--- /dev/null
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -0,0 +1,70 @@
+; This test is attempting to detect when we request forced re-alignment of the
+; stack to an alignment greater than would be available due to the ABI. We
+; arbitrarily force alignment up to 32-bytes for i386 hoping that this will
+; exceed any ABI provisions.
+;
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define i32 @f(i8* %p) nounwind {
+entry:
+  %0 = load i8* %p
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @g(i32 %i) nounwind {
+; CHECK: g:
+; CHECK:      pushl  %ebp
+; CHECK-NEXT: movl   %esp, %ebp
+; CHECK-NEXT: pushl
+; CHECK-NEXT: pushl
+; CHECK-NEXT: andl   $-32, %esp
+; CHECK-NEXT: subl   $32, %esp
+;
+; Now setup the base pointer (%esi).
+; CHECK-NEXT: movl   %esp, %esi
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+;
+; The next adjustment of the stack is due to the alloca.
+; CHECK:      movl   %{{...}}, %esp
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+;
+; Next we set up the memset call, and then undo it.
+; CHECK:      subl   $32, %esp
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+; CHECK:      calll  memset
+; CHECK-NEXT: addl   $32, %esp
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+;
+; Next we set up the call to 'f'.
+; CHECK:      subl   $32, %esp
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+; CHECK:      calll  f
+; CHECK-NEXT: addl   $32, %esp
+; CHECK-NOT:         {{[^ ,]*}}, %esp
+;
+; Restore %esp from %ebp (frame pointer) and subtract the size of
+; zone with callee-saved registers to pop them.
+; This is the state prior to stack realignment and the allocation of VLAs.
+; CHECK-NOT:  popl
+; CHECK:      leal   -8(%ebp), %esp
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl   %ebp
+; CHECK-NEXT: ret
+
+entry:
+  br label %if.then
+
+if.then:
+  %0 = alloca i8, i32 %i
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 %i, i32 1, i1 false)
+  %call = call i32 @f(i8* %0)
+  %conv = sext i32 %call to i64
+  ret i64 %conv
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index cafc61a..62d8100 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,7 +1,7 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
 ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
-; RUN:   grep {.long.1123418112}
+; RUN:   grep ".long.1123418112"
 
 define double @D() {
         ret double 1.230000e+02
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 6966cf0..1f5121d 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mcpu=yonah | FileCheck %s
 ; CHECK-NOT:     {{((xor|and)ps|movd)}}
 
 ; These operations should be done in integer registers, eliminating constant
diff --git a/test/CodeGen/X86/fp-stack-compare-cmov.ll b/test/CodeGen/X86/fp-stack-compare-cmov.ll
new file mode 100644
index 0000000..b457fbc
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-compare-cmov.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
+; PR1012
+
+define float @foo(float* %col.2.0) {
+; CHECK: fucompi
+; CHECK: fcmov
+  %tmp = load float* %col.2.0
+  %tmp16 = fcmp olt float %tmp, 0.000000e+00
+  %tmp20 = fsub float -0.000000e+00, %tmp
+  %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
+  ret float %iftmp.2.0
+}
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index f3998b6..a8557ad 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,8 +1,11 @@
 ; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s
-; PR1012
+; PR6679
 
 define float @foo(float* %col.2.0) {
-; CHECK: fucompi
+; CHECK: fucomp
+; CHECK-NOT: fucompi
+; CHECK: j
+; CHECK-NOT: fcmov
   %tmp = load float* %col.2.0
   %tmp16 = fcmp olt float %tmp, 0.000000e+00
   %tmp20 = fsub float -0.000000e+00, %tmp
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 1307f70..2733117 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -22,7 +22,7 @@ define fastcc double @test2(<2 x double> %A) {
 
 ; CHECK: test3
 ; CHECK: sub{{.*}}%esp
-; CHECLK-NOT: xmm
+; CHECK-NOT: xmm
 define fastcc double @test3(<4 x float> %A) {
 	%B = bitcast <4 x float> %A to <2 x double>
 	%C = call fastcc double @test2(<2 x double> %B)
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 0145069..a2cea5e 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
+; RUN:   grep -i ST | not grep "fadd\|fsub\|fdiv\|fmul"
 
 ; Test that the load of the memory location is folded into the operation.
 
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index ff9b1b0..1344cdc 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,9 +1,17 @@
-; RUN: llc < %s -march=x86 >%t
-
-; RUN: grep {addl	\\\$4,} %t | count 3
-; RUN: not grep {,%} %t
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+; ATOM: foo
+; ATOM: addl
+; ATOM: leal
+; ATOM: leal
+
+; CHECK: foo
+; CHECK: addl
+; CHECK: addl
+; CEHCK: addl
+
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %0, label %bb, label %return
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 4a6927f..72a5096 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
diff --git a/test/CodeGen/X86/gs-fold.ll b/test/CodeGen/X86/gs-fold.ll
new file mode 100644
index 0000000..dbec76b
--- /dev/null
+++ b/test/CodeGen/X86/gs-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd | FileCheck %s --check-prefix=CHECK-FBSD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK-LINUX
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.thread = type { i32, i32, i32, i32 }
+
+define i32 @test() nounwind uwtable {
+entry:
+  %0 = load volatile %struct.thread* addrspace(256)* null
+  %c = getelementptr inbounds %struct.thread* %0, i64 0, i32 2
+  %1 = load i32* %c, align 4
+  ret i32 %1
+}
+
+; Check that we are not assuming that gs contains the address of gs if we are not targeting Linux
+; CHECK-FBSD: movq	%gs:0, %rax
+; CHECK-FBSD: movl	8(%rax), %eax
+; Check that we are assuming that gs contains the address of gs if we are targeting Linux
+; CHECK-LINUX: movl	%gs:8, %eax
+
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 76ffd66..968a9e8 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86 | grep "movzbl	%[abcd]h," | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index 98817f3..a19fca5 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86-64 | grep "movzbl	%[abcd]h," | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 402cdfe..903c453 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-linux > %t
-; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
-; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
+; RUN: grep "movzbl	%[abcd]h," %t | count 8
+; RUN: grep "%[abcd]h" %t | not grep "%r[[:digit:]]*d"
 
 ; LLVM creates virtual registers for values live across blocks
 ; based on the type of the value. Make sure that the extracts
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index 4289fa7..74ecd04 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats -O2 |& grep "1 machine-licm"
+; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.2"
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index a8ba015..9196cce 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,13 +1,17 @@
-; RUN: llc < %s -march=x86-64 -stats  |& \
-; RUN:   grep {5 .*Number of machine instrs printed}
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 ;; Integer absolute value, should produce something at least as good as:
-;;       movl %edi, %ecx
-;;       sarl $31, %ecx
-;;       leal (%rdi,%rcx), %eax
-;;       xorl %ecx, %eax
+;;       movl   %edi, %eax
+;;       negl   %eax
+;;       cmovll %edi, %eax
 ;;       ret
+; rdar://10695237
 define i32 @test(i32 %a) nounwind {
+; CHECK: test:
+; CHECK: mov
+; CHECK-NEXT: neg
+; CHECK-NEXT: cmov
+; CHECK-NEXT: ret
         %tmp1neg = sub i32 0, %a
         %b = icmp sgt i32 %a, -1
         %abs = select i1 %b, i32 %a, i32 %tmp1neg
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index cecf77a..62a21f4 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd	%xmm3, %xmm1"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd	%xmm2, %xmm0"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps	%xmm3, %xmm1"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps	%xmm2, %xmm0"
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
   %y = fmul <4 x double> %x, %z
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
index 134d6e9..747a589 100644
--- a/test/CodeGen/X86/inline-asm-error.ll
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march x86 -regalloc=fast       < %s 2> %t1
+; RUN: not llc -march x86 -regalloc=fast -optimize-regalloc=0 < %s 2> %t1
 ; RUN: not llc -march x86 -regalloc=basic      < %s 2> %t2
 ; RUN: not llc -march x86 -regalloc=greedy     < %s 2> %t3
 ; RUN: FileCheck %s < %t1
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
index 5e76b6c..b069c46 100644
--- a/test/CodeGen/X86/inline-asm-modifier-n.ll
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep { 37}
+; RUN: llc < %s -march=x86 | grep " 37"
 ; rdar://7008959
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index eef6c2f..e6eb9ef 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -43,3 +43,12 @@ entry:
   %0 = tail call i8 asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %p, i1 %desired) nounwind
   ret void
 }
+
+; <rdar://problem/11542429>
+; The constrained GR32_ABCD register class of the 'q' constraint requires
+; special handling after the preceding outputs used up eax-edx.
+define void @constrain_abcd(i8* %h) nounwind ssp {
+entry:
+  %0 = call { i32, i32, i32, i32, i32 } asm sideeffect "", "=&r,=&r,=&r,=&r,=&q,r,~{ecx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %h) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/inreg.ll b/test/CodeGen/X86/inreg.ll
new file mode 100644
index 0000000..6653cfb
--- /dev/null
+++ b/test/CodeGen/X86/inreg.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 | FileCheck --check-prefix=DAG %s
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 -O0 | FileCheck --check-prefix=FAST %s
+
+%struct.s1 = type { double, float }
+
+define void @g1() nounwind {
+entry:
+  %tmp = alloca %struct.s1, align 4
+  call void @f(%struct.s1* inreg sret %tmp, i32 inreg 41, i32 inreg 42, i32 43)
+  ret void
+  ; DAG: g1:
+  ; DAG: subl $[[AMT:.*]], %esp
+  ; DAG-NEXT: $43, (%esp)
+  ; DAG-NEXT: leal    16(%esp), %eax
+  ; DAG-NEXT: movl    $41, %edx
+  ; DAG-NEXT: movl    $42, %ecx
+  ; DAG-NEXT: calll   f
+  ; DAG-NEXT: addl $[[AMT]], %esp
+  ; DAG-NEXT: ret
+
+  ; FAST: g1:
+  ; FAST: subl $[[AMT:.*]], %esp
+  ; FAST-NEXT: leal    8(%esp), %eax
+  ; FAST-NEXT: movl    $41, %edx
+  ; FAST-NEXT: movl    $42, %ecx
+  ; FAST: $43, (%esp)
+  ; FAST: calll   f
+  ; FAST-NEXT: addl $[[AMT]], %esp
+  ; FAST: ret
+}
+
+declare void @f(%struct.s1* inreg sret, i32 inreg, i32 inreg, i32)
+
+%struct.s2 = type {}
+
+define void @g2(%struct.s2* inreg sret %agg.result) nounwind {
+entry:
+  ret void
+  ; DAG: g2
+  ; DAG-NOT: ret $4
+  ; DAG: .size g2
+
+  ; FAST: g2
+  ; FAST-NOT: ret $4
+  ; FAST: .size g2
+}
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index 5ed0e00..b162666 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movb.7(%...)} %t
+; RUN: grep "movb.7(%...)" %t
 ; RUN: not grep leal %t
 
 define i8 @test(i32 *%P) nounwind {
diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
index 8adf723..b1d1a20 100644
--- a/test/CodeGen/X86/ispositive.ll
+++ b/test/CodeGen/X86/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {shrl.*31}
+; RUN: llc < %s -march=x86 | grep "shrl.*31"
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index dbd133c..48e2106 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -22,6 +22,7 @@ declare i32 @bar(...)
 declare i32 @baz(...)
 
 ; rdar://10633221
+; rdar://11355268
 define i32 @g(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK: g:
@@ -32,3 +33,223 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
+
+; rdar://10734411
+define i32 @h(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: h:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+  %cmp = icmp slt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
+define i32 @i(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: i:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
+define i32 @j(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: j:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+  %cmp = icmp ugt i32 %a, %b
+  %sub = sub i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
+define i32 @k(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: k:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+  %cmp = icmp ult i32 %b, %a
+  %sub = sub i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
+; redundant cmp instruction
+define i32 @l(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l:
+; CHECK-NOT: cmp
+  %cmp = icmp slt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 %a
+  ret i32 %cond
+}
+define i32 @m(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: m:
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %b, i32 %sub
+  ret i32 %cond
+}
+; If EFLAGS is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @l2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l2:
+; CHECK: cmp
+  %cmp = icmp eq i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %cmp2 = icmp sgt i32 %b, %a
+  %sel = select i1 %cmp2, i32 %sub, i32 %a
+  ret i32 %sel
+
+if.else:
+  ret i32 %sub
+}
+define i32 @l3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l3:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: jge
+  %cmp = icmp sgt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  ret i32 %sub
+
+if.else:
+  %add = add nsw i32 %sub, 1
+  ret i32 %add
+}
+; rdar://11830760
+; When Movr0 is between sub and cmp, we need to move "Movr0" before sub.
+define i32 @l4(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l4:
+; CHECK: xor
+; CHECK: sub
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %b, %a
+  %sub = sub i32 %a, %b
+  %.sub = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %.sub
+}
+; rdar://11540023
+define i32 @n(i32 %x, i32 %y) nounwind {
+entry:
+; CHECK: n:
+; CHECK-NOT: sub
+; CHECK: cmp
+  %sub = sub nsw i32 %x, %y
+  %cmp = icmp slt i32 %sub, 0
+  %y.x = select i1 %cmp, i32 %y, i32 %x
+  ret i32 %y.x
+}
+; PR://13046
+define void @o() nounwind uwtable {
+entry:
+  %0 = load i16* undef, align 2
+  br i1 undef, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  unreachable
+
+if.end.i:                                         ; preds = %entry
+  br i1 undef, label %sw.bb, label %sw.default
+
+sw.bb:                                            ; preds = %if.end.i
+  br i1 undef, label %if.then44, label %if.end29
+
+if.end29:                                         ; preds = %sw.bb
+; CHECK: o:
+; CHECK: cmp
+  %1 = urem i16 %0, 10
+  %cmp25 = icmp eq i16 %1, 0
+  %. = select i1 %cmp25, i16 2, i16 0
+  br i1 %cmp25, label %if.then44, label %sw.default
+
+sw.default:                                       ; preds = %if.end29, %if.end.i
+  br i1 undef, label %if.then.i96, label %if.else.i97
+
+if.then.i96:                                      ; preds = %sw.default
+  unreachable
+
+if.else.i97:                                      ; preds = %sw.default
+  unreachable
+
+if.then44:                                        ; preds = %if.end29, %sw.bb
+  %aModeRefSel.1.ph = phi i16 [ %., %if.end29 ], [ 3, %sw.bb ]
+  br i1 undef, label %if.then.i103, label %if.else.i104
+
+if.then.i103:                                     ; preds = %if.then44
+  unreachable
+
+if.else.i104:                                     ; preds = %if.then44
+  ret void
+}
+; rdar://11855129
+define i32 @p(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: p:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %add = add nsw i32 %b, %a
+  %cmp = icmp sgt i32 %add, 0
+  %add. = select i1 %cmp, i32 %add, i32 0
+  ret i32 %add.
+}
+; PR13475
+; If we have sub a, b and cmp b, a and the result of cmp is used
+; by sbb, we should not optimize cmp away.
+define i32 @q(i32 %j.4, i32 %w, i32 %el) {
+; CHECK: q:
+; CHECK: sub
+; CHECK: cmp
+; CHECK-NEXT: sbb
+  %tmp532 = add i32 %j.4, %w
+  %tmp533 = icmp ugt i32 %tmp532, %el
+  %tmp534 = icmp ult i32 %w, %el
+  %or.cond = and i1 %tmp533, %tmp534
+  %tmp535 = sub i32 %el, %w
+  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
+  ret i32 %j.5
+}
+; rdar://11873276
+define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: r:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: j
+; CHECK-NOT: sub
+; CHECK: ret
+  %0 = load i32* %offset, align 8
+  %cmp = icmp slt i32 %0, %size
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  %sub = sub nsw i32 %0, %size
+  store i32 %sub, i32* %offset, align 8
+  %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+  br label %return
+
+return:
+  %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
+}
diff --git a/test/CodeGen/X86/label-redefinition.ll b/test/CodeGen/X86/label-redefinition.ll
index 9ad33e0..9e88a18 100644
--- a/test/CodeGen/X86/label-redefinition.ll
+++ b/test/CodeGen/X86/label-redefinition.ll
@@ -1,5 +1,5 @@
 ; PR7054
-; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly}
+; RUN: not llc %s -o - 2>&1 | grep "'_foo' label emitted multiple times to assembly"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/large-global.ll b/test/CodeGen/X86/large-global.ll
new file mode 100644
index 0000000..7cb974b
--- /dev/null
+++ b/test/CodeGen/X86/large-global.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; rdar://11729134
+
+; EmitZerofill was incorrectly expecting a 32-bit "size" so 26214400000
+; was printed as 444596224
+
+%struct.X = type { [25000 x i8] }
+
+@gArray = global [1048576 x %struct.X] zeroinitializer, align 16
+
+; CHECK: .zerofill __DATA,__common,_gArray,26214400000,4
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 6930350..43f69b0 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
+; RUN:   grep "lea	EAX, DWORD PTR \[... + 4\*... - 5\]"
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep add
 
diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll
index b469d083..721f545 100644
--- a/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs -mtriple=x86_64-apple-darwin10
 ; <rdar://problem/7755473>
+; PR12821
 
 %0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] }
 %1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }
@@ -58,3 +59,34 @@ infloop:                                          ; preds = %infloop, %bb3
 infloop1:                                         ; preds = %infloop1, %bb5
   br label %infloop1
 }
+
+
+; RAFast would forget to add a super-register <imp-def> when rewriting:
+;  %vreg10:sub_32bit<def,read-undef> = COPY %R9D<kill>
+; This trips up the machine code verifier.
+define void @autogen_SD24657(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <16 x i16>
+  %A3 = alloca double
+  %A2 = alloca <2 x i8>
+  %A1 = alloca i1
+  %A = alloca i32
+  %L = load i8* %0
+  store i8 -37, i8* %0
+  %E = extractelement <4 x i64> zeroinitializer, i32 2
+  %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I = insertelement <2 x i8> <i8 -1, i8 -1>, i8 %5, i32 1
+  %B = fadd float 0x45CDF5B1C0000000, 0x45CDF5B1C0000000
+  %FC = uitofp i32 275048 to double
+  %Sl = select i1 true, <2 x i8> %I, <2 x i8> <i8 -1, i8 -1>
+  %Cmp = icmp slt i64 0, %E
+  br label %CF
+
+CF:                                               ; preds = %BB
+  store i8 %5, i8* %0
+  store <2 x i8> %I, <2 x i8>* %A2
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index d14102f..4bd162b 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,7 +41,6 @@ done:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
@@ -79,7 +78,6 @@ exit:
 ; CHECK-NEXT: .LBB2_5:
 ; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
 ;
@@ -139,13 +137,13 @@ exit:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   callq   loop_header
 ;      CHECK:   jl .LBB3_7
 ;      CHECK:   jge .LBB3_3
 ; CHECK-NEXT:   callq   bar101
 ; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_3:
 ;      CHECK:   jge .LBB3_4
 ; CHECK-NEXT:   callq   bar102
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index ebda9f2..8a81f70 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,10 +1,16 @@
-; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=generic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: t:
 ; CHECK: decq
-; CHECK-NEXT: movl (
+; CHECK-NEXT: movl (%r9,%rax,4), %eax
 ; CHECK-NEXT: jne
 
+; ATOM: t:
+; ATOM: movl (%r9,%rax,4), %eax
+; ATOM-NEXT: decq
+; ATOM-NEXT: jne
+
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
 @Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
@@ -149,6 +155,13 @@ bb2:		; preds = %bb
 ; CHECK: jne
 ; CHECK: ret
 
+; ATOM: f:
+; ATOM: %for.body
+; ATOM: incl [[IV:%e..]]
+; ATOM: cmpl $1, [[IV]]
+; ATOM: jne
+; ATOM: ret
+
 define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp eq i32 %i, 1
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 1f87089..276dab7 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck %s
 
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index c9ed3e5..6566f56 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: xorl  %eax, %eax
 ; CHECK: movsd .LCPI0_0(%rip), %xmm0
@@ -9,6 +10,15 @@
 ; CHECK-NEXT: movsd
 ; CHECK-NEXT: incq %rax
 
+; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl  %eax, %eax
+; ATOM: align
+; ATOM-NEXT: BB0_2:
+; ATOM-NEXT: movsd A(,%rax,8)
+; ATOM-NEXT: mulsd
+; ATOM-NEXT: movsd
+; ATOM-NEXT: incq %rax
+
 @A = external global [0 x double]
 
 define void @foo(i64 %n) nounwind {
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a757cde..d171fd5 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -99,3 +99,60 @@ return:                                           ; preds = %if.end, %entry
   %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
   ret i32 %retval.0
 }
+
+; rdar://11393714
+define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
+; CHECK: %entry
+; CHECK: xorl
+; CHECK: %preheader
+; CHECK: %do.body
+; CHECK-NOT: xorl
+; CHECK: %do.cond
+; CHECK-NOT: xorl
+; CHECK: %return
+entry:
+  %cmp = icmp eq i64 %n, 0
+  br i1 %cmp, label %return, label %preheader
+
+preheader:
+  %conv2 = and i32 %c, 255
+  br label %do.body
+
+do.body:
+  %n.addr.0 = phi i64 [ %dec, %do.cond ], [ %n, %preheader ]
+  %p.0 = phi i8* [ %incdec.ptr, %do.cond ], [ %s, %preheader ]
+  %cmp3 = icmp eq i32 %a, %conv2
+  br i1 %cmp3, label %return, label %do.cond
+
+do.cond:
+  %incdec.ptr = getelementptr inbounds i8* %p.0, i64 1
+  %dec = add i64 %n.addr.0, -1
+  %cmp6 = icmp eq i64 %dec, 0
+  br i1 %cmp6, label %return, label %do.body
+
+return:
+  %retval.0 = phi i8* [ null, %entry ], [ null, %do.cond ], [ %p.0, %do.body ]
+  ret i8* %retval.0
+}
+
+; PR13578
+@t2_global = external global i32
+
+declare i1 @t2_func()
+
+define i32 @t2() {
+  store i32 42, i32* @t2_global
+  %c = call i1 @t2_func()
+  br i1 %c, label %a, label %b
+
+a:
+  %l = load i32* @t2_global
+  ret i32 %l
+
+b:
+  ret i32 0
+
+; CHECK: t2:
+; CHECK: t2_global@GOTPCREL(%rip)
+; CHECK-NOT: t2_global@GOTPCREL(%rip)
+}
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
index 80103d1..0015df0 100644
--- a/test/CodeGen/X86/mem-promote-integers.ll
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -1,8 +1,8 @@
 ; Test the basic functionality of integer element promotions of different types.
 ; This tests checks passing of arguments, loading and storing to memory and
 ; basic arithmetic.
-; RUN: llc -march=x86 -promote-elements < %s
-; RUN: llc -march=x86-64 -promote-elements < %s
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86-64 < %s
 
 define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
   %bb = load <1 x i8>* %b
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f4bc1bb..723d1d8 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -disable-simplify-libcalls -mtriple=x86_64-linux | FileCheck %s --check-prefix=NOBUILTIN
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; This tests codegen time inlining/optimization of memcmp
@@ -23,6 +24,8 @@ return:                                           ; preds = %entry
 ; CHECK: memcmp2:
 ; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
 ; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
+; NOBUILTIN: memcmp2:
+; NOBUILTIN: callq
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 689f7bf..206cb33 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -3,7 +3,7 @@
 
 define void @bork(<1 x i64>* %x) {
 ; CHECK: bork
-; CHECK: pextrd
+; CHECK: movlpd
 entry:
 	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
 	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index aeb540f..65ee7b1 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -55,4 +55,20 @@ entry:
 ; X64:	ret
 }
 
+; The two loads here both look identical to selection DAG, except for their
+; address spaces.  Make sure they aren't CSE'd.
+define i32 @test_no_cse() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp1, %tmp3
+	ret i32 %tmp4
+}
+; X32: test_no_cse:
+; X32: 	movl	%gs:196
+; X32: 	movl	%fs:196
+; X32: 	ret
+
 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 4f7e28a..9f7d036 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,9 +1,9 @@
-; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7236213
-
-; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
-; The code isn't any worse though.
-; XFAIL: *
+;
+; The scheduler's 2-address hack has been disabled, so there is
+; currently no good guarantee that this test will pass until the
+; machine scheduler develops an equivalent heuristic.
 
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
diff --git a/test/CodeGen/X86/neg_cmp.ll b/test/CodeGen/X86/neg_cmp.ll
new file mode 100644
index 0000000..866514e
--- /dev/null
+++ b/test/CodeGen/X86/neg_cmp.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://11245199
+; PR12545
+define void @f(i32 %x, i32 %y) nounwind uwtable ssp {
+entry:
+; CHECK: f:
+; CHECK-NOT: neg
+; CHECK: add
+  %sub = sub i32 0, %y
+  %cmp = icmp eq i32 %x, %sub
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @g() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @g()
diff --git a/test/CodeGen/X86/opt-shuff-tstore.ll b/test/CodeGen/X86/opt-shuff-tstore.ll
index fc24913..3e72084 100644
--- a/test/CodeGen/X86/opt-shuff-tstore.ll
+++ b/test/CodeGen/X86/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -promote-elements -mattr=+sse2,+sse41 | FileCheck %s
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse41 | FileCheck %s
 
 ; CHECK: func_4_8
 ; A single memory write
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index d185af1..e987495 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -7,7 +7,7 @@
 ; Check that the shift gets turned into an LEA.
 
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep {mov E.X, E.X}
+; RUN:   not grep "mov E.X, E.X"
 
 @G = external global i32                ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/pass-three.ll b/test/CodeGen/X86/pass-three.ll
new file mode 100644
index 0000000..23005c7
--- /dev/null
+++ b/test/CodeGen/X86/pass-three.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.3.0"
+
+
+define { i8*, i64, i64* } @copy_3(i8* %a, i64 %b, i64* %c) nounwind {
+entry:
+  %0 = insertvalue { i8*, i64, i64* } undef, i8* %a, 0
+  %1 = insertvalue { i8*, i64, i64* } %0, i64 %b, 1
+  %2 = insertvalue { i8*, i64, i64* } %1, i64* %c, 2
+  ret { i8*, i64, i64* } %2
+}
+
+; CHECK: copy_3:
+; CHECK-NOT: (%rdi)
+; CHECK: ret
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index d48a331..f958b6b 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {xorps	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep "xorps	%xmm0, %xmm0" | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index ef02af2..476bb10 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
+; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
 ; PR1296
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/phielim-split.ll b/test/CodeGen/X86/phielim-split.ll
new file mode 100644
index 0000000..aa47735
--- /dev/null
+++ b/test/CodeGen/X86/phielim-split.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; The critical edge from for.cond to if.end2 should be split to avoid injecting
+; copies into the loop. The use of %b after the loop causes interference that
+; makes a copy necessary.
+; <rdar://problem/11561842>
+;
+; CHECK: split_loop_exit
+; CHECK: %for.cond
+; CHECK-NOT: mov
+; CHECK: je
+
+define i32 @split_loop_exit(i32 %a, i32 %b, i8* nocapture %p) nounwind uwtable readonly ssp {
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  br i1 %cmp, label %for.cond, label %if.end2
+
+for.cond:                                         ; preds = %entry, %for.cond
+  %p.addr.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %p, %entry ]
+  %incdec.ptr = getelementptr inbounds i8* %p.addr.0, i64 1
+  %0 = load i8* %p.addr.0, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %for.cond, label %if.end2
+
+if.end2:                                          ; preds = %for.cond, %entry
+  %r.0 = phi i32 [ %a, %entry ], [ %b, %for.cond ]
+  %add = add nsw i32 %r.0, %b
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index 8b9ea17..37eca1c 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; CHECKed instructions should be the same with or without -O0.
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
 
 @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
 
@@ -15,6 +16,19 @@ entry:
 ; CHECK: movl	%ebx, 40(%esp)
 ; CHECK-NOT: movl
 ; CHECK: addl %ebx, %eax
+
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM: movl
+; ATOM: movl   %eax, 36(%esp)
+; ATOM-NOT: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl   %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
   %"%eax" = alloca i32                            ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 4162015..984d7e5 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,10 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -join-physregs | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; XFAIL: *
 ; rdar://5571034
 
 ; This requires physreg joining, %vreg13 is live everywhere:
 ; 304L		%CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
 ; 320L		%vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
 ; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
+;
+; This test is XFAIL until the register allocator understands trivial physreg
+; interference. <rdar://9802098>
 
 define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index d8ed4c0..da4af81 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 11
 
-; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
-
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
         ret <4 x i32> %A
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index cc1df2f..800fbed 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -105,8 +105,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
 entry:
   %G = load <2 x i8*>* %p
 ;CHECK: movl
-;CHECK: movd
-;CHECK: pinsrd
+;CHECK: movsd
   %T = bitcast <2 x i8*> %G to <2 x i32*>
 ;CHECK: ret
   ret <2 x i32*> %T
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
index e1fa032..6c32a22 100644
--- a/test/CodeGen/X86/pr11415.ll
+++ b/test/CodeGen/X86/pr11415.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast -optimize-regalloc=0 | FileCheck %s
 
 ; We used to consider the early clobber in the second asm statement as
 ; defining %0 before it was read. This caused us to omit the
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
new file mode 100644
index 0000000..f7e9adb
--- /dev/null
+++ b/test/CodeGen/X86/pr11468.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; PR11468
+
+define void @f(i64 %sz) uwtable {
+entry:
+  %a = alloca i32, align 32
+  store volatile i32 0, i32* %a, align 32
+  ; force to push r14 on stack
+  call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+; CHECK: _f
+; CHECK: pushq %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+
+; We first push register on stack, and then realign it, so that
+; .cfi_offset value is correct
+; CHECK: pushq %r14
+; CHECK: andq $-32, %rsp
+; CHECK: .cfi_offset %r14, -24
+
+; Restore %rsp from %rbp and subtract the total size of saved regsiters.
+; CHECK: leaq -8(%rbp), %rsp
+
+; Pop saved registers.
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+!0 = metadata !{i32 125}
+
diff --git a/test/CodeGen/X86/pr12889.ll b/test/CodeGen/X86/pr12889.ll
new file mode 100644
index 0000000..331d8f9
--- /dev/null
+++ b/test/CodeGen/X86/pr12889.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@c0 = common global i8 0, align 1
+
+define void @func() nounwind uwtable {
+entry:
+  %0 = load i8* @c0, align 1, !tbaa !0
+  %tobool = icmp ne i8 %0, 0
+  %conv = zext i1 %tobool to i8
+  %storemerge = shl nuw nsw i8 %conv, %conv
+  store i8 %storemerge, i8* @c0, align 1
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/pr13209.ll b/test/CodeGen/X86/pr13209.ll
new file mode 100644
index 0000000..1c93163
--- /dev/null
+++ b/test/CodeGen/X86/pr13209.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; CHECK: pr13209:
+; CHECK-NOT: mov
+; CHECK: .size pr13209
+
+define zeroext i1 @pr13209(i8** %x, i8*** %jumpTable) nounwind {
+if.end51:
+  br label %indirectgoto.preheader
+indirectgoto.preheader:
+  %frombool.i5915.ph = phi i8 [ undef, %if.end51 ], [ %frombool.i5917, %jit_return ]
+  br label %indirectgoto
+do.end165:
+  %tmp92 = load i8** %x, align 8
+  br label %indirectgoto
+do.end209:
+  %tmp104 = load i8** %x, align 8
+  br label %indirectgoto
+do.end220:
+  %tmp107 = load i8** %x, align 8
+  br label %indirectgoto
+do.end231:
+  %tmp110 = load i8** %x, align 8
+  br label %indirectgoto
+do.end242:
+  %tmp113 = load i8** %x, align 8
+  br label %indirectgoto
+do.end253:
+  %tmp116 = load i8** %x, align 8
+  br label %indirectgoto
+do.end286:
+  %tmp125 = load i8** %x, align 8
+  br label %indirectgoto
+do.end297:
+  %tmp128 = load i8** %x, align 8
+  br label %indirectgoto
+do.end308:
+  %tmp131 = load i8** %x, align 8
+  br label %indirectgoto
+do.end429:
+  %tmp164 = load i8** %x, align 8
+  br label %indirectgoto
+do.end440:
+  %tmp167 = load i8** %x, align 8
+  br label %indirectgoto
+do.body482:
+  br i1 false, label %indirectgoto, label %do.body495
+do.body495:
+  br label %indirectgoto
+do.end723:
+  br label %inline_return
+inline_return:
+  %frombool.i5917 = phi i8 [ 0, %if.end5571 ], [ %frombool.i5915, %do.end723 ]
+  br label %jit_return
+jit_return:
+  br label %indirectgoto.preheader
+L_JSOP_UINT24:
+  %tmp864 = load i8** %x, align 8
+  br label %indirectgoto
+L_JSOP_THROWING:
+  %tmp1201 = load i8** %x, align 8
+  br label %indirectgoto
+do.body4936:
+  %tmp1240 = load i8** %x, align 8
+  br label %indirectgoto
+do.body5184:
+  %tmp1340 = load i8** %x, align 8
+  br label %indirectgoto
+if.end5571:
+  br  label %inline_return
+indirectgoto:
+  %frombool.i5915 = phi i8  [ 0, %do.body495 ],[ 0, %do.body482 ] , [ %frombool.i5915, %do.body4936 ],[ %frombool.i5915, %do.body5184 ], [ %frombool.i5915, %L_JSOP_UINT24 ], [ %frombool.i5915, %do.end286 ], [ %frombool.i5915, %do.end297 ], [ %frombool.i5915, %do.end308 ], [ %frombool.i5915, %do.end429 ], [ %frombool.i5915, %do.end440 ], [ %frombool.i5915, %L_JSOP_THROWING ], [ %frombool.i5915, %do.end253 ], [ %frombool.i5915, %do.end242 ], [ %frombool.i5915, %do.end231 ], [ %frombool.i5915, %do.end220 ], [ %frombool.i5915, %do.end209 ],[ %frombool.i5915, %do.end165 ], [ %frombool.i5915.ph, %indirectgoto.preheader ]
+  indirectbr i8* null, [ label %if.end5571, label %do.end165, label %do.end209, label %do.end220, label %do.end231, label %do.end242, label %do.end253, label %do.end723, label %L_JSOP_THROWING, label %do.end440, label %do.end429, label %do.end308, label %do.end297, label %do.end286, label %L_JSOP_UINT24, label %do.body5184, label %do.body4936, label %do.body482]
+}
diff --git a/test/CodeGen/X86/pr13220.ll b/test/CodeGen/X86/pr13220.ll
new file mode 100644
index 0000000..b9ac4b6
--- /dev/null
+++ b/test/CodeGen/X86/pr13220.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 < %s
+; PR13220
+
+define <8 x i32> @foo(<8 x i96> %x) {
+  %a = lshr <8 x i96> %x, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+  %b = trunc <8 x i96> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+define <8 x i32> @bar(<8 x i97> %x) {
+  %a = lshr <8 x i97> %x, <i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1>
+  %b = trunc <8 x i97> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+define <8 x i32> @bax() {
+  %a = lshr <8 x i96> <i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4>, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+  %b = trunc <8 x i96> %a to <8 x i32>
+  ret <8 x i32> %b
+}
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
new file mode 100644
index 0000000..faaec26
--- /dev/null
+++ b/test/CodeGen/X86/pr13577.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64
+
+define x86_fp80 @foo(x86_fp80 %a) {
+  %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone
+  ret x86_fp80 %1
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index afd7114..f0e31f7 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep "xorps.*sp" | count 1
 ; PR2656
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 1122530..d8f3778 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk"
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/promote-trunc.ll b/test/CodeGen/X86/promote-trunc.ll
index 4211d82..40a58b0 100644
--- a/test/CodeGen/X86/promote-trunc.ll
+++ b/test/CodeGen/X86/promote-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -promote-elements < %s -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define<4 x i8> @func_8_64() {
   %F = load <4 x i64>* undef
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index faca3d7..8ef9b5d 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -177,3 +177,49 @@ if.end4:
 return:
   ret void
 }
+
+; Deal with TokenFactor chain
+; rdar://11236106
+@foo = external global i64*, align 8
+
+define void @test3() nounwind ssp {
+entry:
+; CHECK: test3:
+; CHECK: decq 16(%rax)
+  %0 = load i64** @foo, align 8
+  %arrayidx = getelementptr inbounds i64* %0, i64 2
+  %1 = load i64* %arrayidx, align 8
+  %dec = add i64 %1, -1
+  store i64 %dec, i64* %arrayidx, align 8
+  %cmp = icmp eq i64 %dec, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @baz() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @baz()
+
+; Avoid creating a cycle in the DAG which would trigger an assert in the
+; scheduler.
+; PR12565
+; rdar://11451474
+@x = external global i32, align 4
+@y = external global i32, align 4
+@z = external global i32, align 4
+
+define void @test4() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %dec = add nsw i32 %1, -1
+  store i32 %dec, i32* @y, align 4
+  %tobool.i = icmp ne i32 %dec, 0
+  %cond.i = select i1 %tobool.i, i32 %0, i32 0
+  store i32 %cond.i, i32* @z, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
new file mode 100644
index 0000000..e2224a6
--- /dev/null
+++ b/test/CodeGen/X86/rdrand.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdrand | FileCheck %s
+declare {i16, i32} @llvm.x86.rdrand.16()
+declare {i32, i32} @llvm.x86.rdrand.32()
+declare {i64, i32} @llvm.x86.rdrand.64()
+
+define i32 @_rdrand16_step(i16* %random_val) {
+  %call = call {i16, i32} @llvm.x86.rdrand.16()
+  %randval = extractvalue {i16, i32} %call, 0
+  store i16 %randval, i16* %random_val
+  %isvalid = extractvalue {i16, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand16_step:
+; CHECK: rdrandw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
+; CHECK: movzwl	%ax, %ecx
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand32_step(i32* %random_val) {
+  %call = call {i32, i32} @llvm.x86.rdrand.32()
+  %randval = extractvalue {i32, i32} %call, 0
+  store i32 %randval, i32* %random_val
+  %isvalid = extractvalue {i32, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand32_step:
+; CHECK: rdrandl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand64_step(i64* %random_val) {
+  %call = call {i64, i32} @llvm.x86.rdrand.64()
+  %randval = extractvalue {i64, i32} %call, 0
+  store i64 %randval, i64* %random_val
+  %isvalid = extractvalue {i64, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand64_step:
+; CHECK: rdrandq	%r[[T1:[[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: ret
+}
+
+; Check that MachineCSE doesn't eliminate duplicate rdrand instructions.
+define i32 @CSE() nounwind {
+ %rand1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v1 = extractvalue { i32, i32 } %rand1, 0
+ %rand2 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v2 = extractvalue { i32, i32 } %rand2, 0
+ %add = add i32 %v2, %v1
+ ret i32 %add
+; CHECK: CSE:
+; CHECK: rdrandl
+; CHECK: rdrandl
+}
+
+; Check that MachineLICM doesn't hoist rdrand instructions.
+define void @loop(i32* %p, i32 %n) nounwind {
+entry:
+  %tobool1 = icmp eq i32 %n, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %p.addr.03 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %dec = add nsw i32 %n.addr.02, -1
+  %incdec.ptr = getelementptr inbounds i32* %p.addr.03, i64 1
+  %rand = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+  %v1 = extractvalue { i32, i32 } %rand, 0
+  store i32 %v1, i32* %p.addr.03, align 4
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+; CHECK: loop:
+; CHECK-NOT: rdrandl
+; CHECK: This Inner Loop Header: Depth=1
+; CHECK: rdrandl
+}
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index e0b5f7a..52d7b56 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,8 +1,8 @@
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
-; RUN: llc < %s -march=x86 -stats |& \
-; RUN:   not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
+; RUN:   not grep "Number of register spills"
 
 ;; This can be compiled to use three registers if the loads are not
 ;; folded into the multiplies, 2 registers otherwise.
diff --git a/test/CodeGen/X86/remat-fold-load.ll b/test/CodeGen/X86/remat-fold-load.ll
new file mode 100644
index 0000000..de77ad3
--- /dev/null
+++ b/test/CodeGen/X86/remat-fold-load.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -disable-fp-elim -verify-coalescing
+; PR13414
+;
+; During coalescing, remat triggers DCE which deletes the penultimate use of a
+; load. This load should not be folded into the remaining use because it is not
+; safe to move, and it would extend the live range of the address.
+;
+; LiveRangeEdit::foldAsLoad() doesn't extend live ranges, so -verify-coalescing
+; catches the problem.
+
+target triple = "i386-unknown-linux-gnu"
+
+%type_a = type { %type_a*, %type_b }
+%type_b = type { %type_c, i32 }
+%type_c = type { i32, %type_d }
+%type_d = type { i64 }
+%type_e = type { %type_c, i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define linkonce_odr void @test() nounwind {
+entry:
+  br i1 undef, label %while.end.while.end26_crit_edge, label %while.body12.lr.ph
+
+while.end.while.end26_crit_edge:                  ; preds = %entry
+  br label %while.end26
+
+while.body12.lr.ph:                               ; preds = %entry
+  br label %while.body12
+
+while.body12:                                     ; preds = %if.end24, %while.body12.lr.ph
+  %tmp = phi %type_a* [ undef, %while.body12.lr.ph ], [ %tmp18, %if.end24 ]
+  %ins151154161 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp, %if.end24 ]
+  %ins135156160 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp158, %if.end24 ]
+  %ins151 = or i128 0, %ins151154161
+  %cmp.i.i.i.i.i67 = icmp sgt i32 undef, 8
+  br i1 %cmp.i.i.i.i.i67, label %if.then.i.i.i.i71, label %if.else.i.i.i.i74
+
+if.then.i.i.i.i71:                                ; preds = %while.body12
+  %call4.i.i.i.i68 = call noalias i8* @malloc(i32 undef) nounwind
+  %tmp1 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
+  %buf_6.i.i.i.i70 = bitcast %type_d* %tmp1 to i8**
+  %tmp2 = load i8** %buf_6.i.i.i.i70, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %tmp2, i32 undef, i32 1, i1 false) nounwind
+  unreachable
+
+if.else.i.i.i.i74:                                ; preds = %while.body12
+  %i_.i.i.i.i72 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
+  %tmp3 = load i64* %i_.i.i.i.i72, align 4
+  %tmp4 = zext i64 %tmp3 to i128
+  %tmp5 = shl nuw nsw i128 %tmp4, 32
+  %ins148 = or i128 %tmp5, %ins151
+  %second3.i.i76 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 1
+  %tmp6 = load i32* %second3.i.i76, align 4
+  %tmp7 = zext i32 %tmp6 to i128
+  %tmp8 = shl nuw i128 %tmp7, 96
+  %mask144 = and i128 %ins148, 79228162495817593519834398720
+  %tmp9 = load %type_e** undef, align 4
+  %len_.i.i.i.i86 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 0
+  %tmp10 = load i32* %len_.i.i.i.i86, align 4
+  %tmp11 = zext i32 %tmp10 to i128
+  %ins135 = or i128 %tmp11, %ins135156160
+  %cmp.i.i.i.i.i88 = icmp sgt i32 %tmp10, 8
+  br i1 %cmp.i.i.i.i.i88, label %if.then.i.i.i.i92, label %if.else.i.i.i.i95
+
+if.then.i.i.i.i92:                                ; preds = %if.else.i.i.i.i74
+  %call4.i.i.i.i89 = call noalias i8* @malloc(i32 %tmp10) nounwind
+  %ins126 = or i128 0, %ins135
+  %tmp12 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1
+  %buf_6.i.i.i.i91 = bitcast %type_d* %tmp12 to i8**
+  %tmp13 = load i8** %buf_6.i.i.i.i91, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call4.i.i.i.i89, i8* %tmp13, i32 %tmp10, i32 1, i1 false) nounwind
+  br label %A
+
+if.else.i.i.i.i95:                                ; preds = %if.else.i.i.i.i74
+  %i_.i.i.i.i93 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1, i32 0
+  br label %A
+
+A:                                                ; preds = %if.else.i.i.i.i95, %if.then.i.i.i.i92
+  %ins135157 = phi i128 [ %ins126, %if.then.i.i.i.i92 ], [ undef, %if.else.i.i.i.i95 ]
+  %second3.i.i97 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 1
+  %tmp14 = load i64* %second3.i.i97, align 4
+  %tmp15 = trunc i64 %tmp14 to i32
+  %cmp.i99 = icmp sgt i32 %tmp6, %tmp15
+  %tmp16 = trunc i128 %ins135157 to i32
+  %cmp.i.i.i.i.i.i101 = icmp sgt i32 %tmp16, 8
+  br i1 %cmp.i.i.i.i.i.i101, label %if.then.i.i.i.i.i103, label %B
+
+if.then.i.i.i.i.i103:                             ; preds = %A
+  unreachable
+
+B:                                                ; preds = %A
+  %tmp17 = trunc i128 %ins148 to i32
+  %cmp.i.i.i.i.i.i83 = icmp sgt i32 %tmp17, 8
+  br i1 %cmp.i.i.i.i.i.i83, label %if.then.i.i.i.i.i85, label %C
+
+if.then.i.i.i.i.i85:                              ; preds = %B
+  unreachable
+
+C:                                                ; preds = %B
+  br i1 %cmp.i99, label %if.then17, label %if.end24
+
+if.then17:                                        ; preds = %C
+  br i1 false, label %if.then.i.i.i.i.i43, label %D
+
+if.then.i.i.i.i.i43:                              ; preds = %if.then17
+  unreachable
+
+D:                                                ; preds = %if.then17
+  br i1 undef, label %if.then.i.i.i.i.i, label %E
+
+if.then.i.i.i.i.i:                                ; preds = %D
+  unreachable
+
+E:                                                ; preds = %D
+  br label %if.end24
+
+if.end24:                                         ; preds = %E, %C
+  %phitmp = or i128 %tmp8, %mask144
+  %phitmp158 = or i128 undef, undef
+  %tmp18 = load %type_a** undef, align 4
+  %tmp19 = load %type_a** undef, align 4
+  %cmp.i49 = icmp eq %type_a* %tmp18, %tmp19
+  br i1 %cmp.i49, label %while.cond10.while.end26_crit_edge, label %while.body12
+
+while.cond10.while.end26_crit_edge:               ; preds = %if.end24
+  %.pre = load %type_e** undef, align 4
+  br label %while.end26
+
+while.end26:                                      ; preds = %while.cond10.while.end26_crit_edge, %while.end.while.end26_crit_edge
+  br i1 undef, label %while.body.lr.ph.i, label %F
+
+while.body.lr.ph.i:                               ; preds = %while.end26
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %while.body.i, %while.body.lr.ph.i
+  br i1 false, label %while.body.i, label %F
+
+F:                                                ; preds = %while.body.i, %while.end26
+  ret void
+}
+
+declare noalias i8* @malloc(i32) nounwind
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index 75f438d..f6095a7 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
-; RUN: grep {\\.quad.*0} %t
+; RUN: grep "\.quad.*0" %t
 
 ; Remat should be able to fold the zero constant into the div instructions
 ; as a constant-pool load.
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
new file mode 100644
index 0000000..9772125
--- /dev/null
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+@.str2 = private unnamed_addr constant [7 x i8] c"memchr\00", align 1
+@.str3 = private unnamed_addr constant [11 x i8] c"bsd_memchr\00", align 1
+@str4 = private unnamed_addr constant [5 x i8] c"Bug!\00"
+
+; Make sure at end of do.cond.i, we jump to do.body.i first to have a tighter
+; inner loop.
+define i32 @test_branches_order() uwtable ssp {
+; CHECK: test_branches_order:
+; CHECK: [[L0:LBB0_[0-9]+]]: ## %do.body.i
+; CHECK: je
+; CHECK: %do.cond.i
+; CHECK: jne [[L0]]
+; CHECK: jmp
+; CHECK: %exit
+entry:
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  br label %for.cond
+
+for.cond:
+  %j.0 = phi i32 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  %cmp = icmp slt i32 %j.0, 1000
+  br i1 %cmp, label %for.cond1, label %for.end11
+
+for.cond1:
+  %indvars.iv50 = phi i64 [ %indvars.iv.next51, %for.body3 ], [ 0, %for.cond ]
+  %0 = trunc i64 %indvars.iv50 to i32
+  %cmp2 = icmp slt i32 %0, 1000
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.body3:
+  %arraydecay = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
+  %call = call i8* @memchr(i8* %arraydecay, i32 120, i64 1000)
+  %add.ptr = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
+  %cmp7 = icmp eq i8* %call, %add.ptr
+  %indvars.iv.next51 = add i64 %indvars.iv50, 1
+  br i1 %cmp7, label %for.cond1, label %if.then
+
+if.then:
+  %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc9:
+  %inc10 = add nsw i32 %j.0, 1
+  br label %for.cond
+
+for.end11:
+  %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0))
+  br label %for.cond14
+
+for.cond14:
+  %j13.0 = phi i32 [ 0, %for.end11 ], [ %inc39, %for.inc38 ]
+  %cmp15 = icmp slt i32 %j13.0, 1000
+  br i1 %cmp15, label %for.cond18, label %for.end40
+
+for.cond18:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %exit ], [ 0, %for.cond14 ]
+  %1 = trunc i64 %indvars.iv to i32
+  %cmp19 = icmp slt i32 %1, 1000
+  br i1 %cmp19, label %for.body20, label %for.inc38
+
+for.body20:
+  %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
+  br label %do.body.i
+
+do.body.i:
+  %n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
+  %p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
+  %2 = load i8* %p.0.i, align 1
+  %cmp3.i = icmp eq i8 %2, 120
+  br i1 %cmp3.i, label %exit, label %do.cond.i
+
+do.cond.i:
+  %incdec.ptr.i = getelementptr inbounds i8* %p.0.i, i64 1
+  %dec.i = add i64 %n.addr.0.i, -1
+  %cmp5.i = icmp eq i64 %dec.i, 0
+  br i1 %cmp5.i, label %if.then32, label %do.body.i
+
+exit:
+  %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
+  %cmp31 = icmp eq i8* %p.0.i, %add.ptr30
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  br i1 %cmp31, label %for.cond18, label %if.then32
+
+if.then32:
+  %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc38:
+  %inc39 = add nsw i32 %j13.0, 1
+  br label %for.cond14
+
+for.end40:
+  %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8]* @.str3, i64 0, i64 0))
+  ret i32 0
+}
+
+declare i8* @memchr(i8*, i32, i64) nounwind readonly
+declare void @exit(i32) noreturn
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
index 1e20273..1173001 100644
--- a/test/CodeGen/X86/rotate.ll
+++ b/test/CodeGen/X86/rotate.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {ro\[rl\]} | count 12
+; RUN:   grep "ro[rl]" | count 12
 
 define i32 @rotl32(i32 %A, i8 %Amt) {
 	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 0dd74ea..51fcf64 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
 
 define float @test1(float %x) nounwind  {
   %call = tail call float @floorf(float %x) nounwind readnone
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index 5ce08aa..d68b00b 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -51,14 +51,14 @@ false:
 ; X64-NEXT: callq __morestack
 ; X64-NEXT: ret
 
-; X64:      movq %rsp, %rdi
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: cmpq %rdi, %fs:112
+; X64:      movq %rsp, %[[RDI:rdi|rax]]
+; X64-NEXT: subq %{{.*}}, %[[RDI]]
+; X64-NEXT: cmpq %[[RDI]], %fs:112
 
-; X64:      movq %rdi, %rsp
+; X64:      movq %[[RDI]], %rsp
 
-; X64:      movq %rax, %rdi
+; X64:      movq %{{.*}}, %rdi
 ; X64-NEXT: callq __morestack_allocate_stack_space
-; X64-NEXT: movq %rax, %rdi
+; X64:      movq %rax, %rdi
 
 }
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index f465a4f..2e39473 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 ; PR5757
 
 %0 = type { i64, i32 }
@@ -12,6 +13,10 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
 ; CHECK: test1:
 ; CHECK: cmovneq %rdi, %rsi
 ; CHECK: movl (%rsi), %eax
+
+; ATOM: test1:
+; ATOM: cmovneq %rdi, %rsi
+; ATOM: movl (%rsi), %eax
 }
 
 
@@ -31,6 +36,10 @@ bb91:		; preds = %bb84
 ; CHECK: test2:
 ; CHECK: movnew
 ; CHECK: movswl
+
+; ATOM: test2:
+; ATOM: movnew
+; ATOM: movswl
 }
 
 declare i1 @return_false()
@@ -44,6 +53,9 @@ entry:
 	ret float %iftmp.0.0
 ; CHECK: test3:
 ; CHECK: movss	{{.*}},4), %xmm0
+
+; ATOM: test3:
+; ATOM: movss  {{.*}},4), %xmm0
 }
 
 define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
@@ -55,6 +67,9 @@ entry:
 	ret i8 %2
 ; CHECK: test4:
 ; CHECK: movsbl	({{.*}},4), %eax
+
+; ATOM: test4:
+; ATOM: movsbl ({{.*}},4), %eax
 }
 
 define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
@@ -62,6 +77,8 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
   store <2 x i16> %x, <2 x i16>* %p
   ret void
 ; CHECK: test5:
+
+; ATOM: test5:
 }
 
 define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
@@ -79,6 +96,12 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK: ret
 ; CHECK: mulps
 ; CHECK: ret
+
+; ATOM: test6:
+; ATOM: je
+; ATOM: ret
+; ATOM: mulps
+; ATOM: ret
 }
 
 ; Select with fp80's
@@ -89,6 +112,10 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
 ; CHECK: test7:
 ; CHECK: leaq
 ; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+
+; ATOM: test7:
+; ATOM: leaq
+; ATOM: fldt (%r{{.}}x,%r{{.}}x)
 }
 
 ; widening select v6i32 and then a sub
@@ -97,8 +124,10 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	store <6 x i32> %val, <6 x i32>* %dst.addr
 	ret void
-        
+
 ; CHECK: test8:
+
+; ATOM: test8:
 }
 
 
@@ -113,6 +142,12 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Same as test9
@@ -125,6 +160,12 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -137,6 +178,12 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9b:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Select between -1 and 1.
@@ -149,6 +196,12 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	$1, %rax
 ; CHECK: ret
+
+; ATOM: test10:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    $1, %rax
+; ATOM: ret
 }
 
 
@@ -163,6 +216,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -175,6 +235,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 
@@ -189,10 +256,16 @@ entry:
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
 ; CHECK: test12:
-; CHECK: mulq
 ; CHECK: movq $-1, %rdi
+; CHECK: mulq
 ; CHECK: cmovnoq	%rax, %rdi
 ; CHECK: jmp	__Znam
+
+; ATOM: test12:
+; ATOM: mulq
+; ATOM: movq $-1, %rdi
+; ATOM: cmovnoq        %rax, %rdi
+; ATOM: jmp    __Znam
 }
 
 declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
@@ -205,6 +278,11 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 ; CHECK: cmpl
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: ret
+
+; ATOM: test13:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: ret
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -216,5 +294,53 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: notl
 ; CHECK-NEXT: ret
+
+; ATOM: test14:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: notl
+; ATOM-NEXT: ret
+}
+
+; rdar://10961709
+define i32 @test15(i32 %x) nounwind {
+entry:
+  %cmp = icmp ne i32 %x, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+; CHECK: test15:
+; CHECK: negl
+; CHECK: sbbl
+
+; ATOM: test15:
+; ATOM: negl
+; ATOM: sbbl
 }
 
+define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
+entry:
+  %cmp = icmp ne i64 %x, 0
+  %conv1 = sext i1 %cmp to i64
+  ret i64 %conv1
+; CHECK: test16:
+; CHECK: negq
+; CHECK: sbbq
+
+; ATOM: test16:
+; ATOM: negq
+; ATOM: sbbq
+}
+
+define i16 @test17(i16 %x) nounwind {
+entry:
+  %cmp = icmp ne i16 %x, 0
+  %sub = sext i1 %cmp to i16
+  ret i16 %sub
+; CHECK: test17:
+; CHECK: negw
+; CHECK: sbbw
+
+; ATOM: test17:
+; ATOM: negw
+; ATOM: sbbw
+}
diff --git a/test/CodeGen/X86/selectiondag-cse.ll b/test/CodeGen/X86/selectiondag-cse.ll
new file mode 100644
index 0000000..a653a1c
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-cse.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s
+; PR12599
+;
+; This bitcode causes the X86 target to make changes to the DAG during
+; selection in MatchAddressRecursively. The edit triggers CSE which causes both
+; the current node and yet-to-be-selected nodes to be deleted.
+;
+; SelectionDAGISel::DoInstructionSelection must handle that.
+;
+target triple = "x86_64-apple-macosx"
+
+%0 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %1*, %2*, %9*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %10*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%1 = type { i32, i32, [100 x %2*], i32, float, float, float }
+%2 = type { i32, i32, i32, i32, i32, i32, %3*, %6*, %8*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%3 = type { %4*, %5, %5 }
+%4 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%5 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%6 = type { [3 x [11 x %7]], [2 x [9 x %7]], [2 x [10 x %7]], [2 x [6 x %7]], [4 x %7], [4 x %7], [3 x %7] }
+%7 = type { i16, i8, i64 }
+%8 = type { [2 x %7], [4 x %7], [3 x [4 x %7]], [10 x [4 x %7]], [10 x [15 x %7]], [10 x [15 x %7]], [10 x [5 x %7]], [10 x [5 x %7]], [10 x [15 x %7]], [10 x [15 x %7]] }
+%9 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %9*, %9*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%10 = type { i32, i32, i32, i32, i32, %10* }
+
+@images = external hidden global %0, align 8
+
+define hidden fastcc void @Mode_Decision_for_4x4IntraBlocks() nounwind uwtable ssp {
+bb4:
+  %tmp = or i208 undef, 0
+  br i1 undef, label %bb35, label %bb5
+
+bb5:
+  %tmp6 = add i32 0, 2
+  %tmp7 = lshr i208 %tmp, 80
+  %tmp8 = trunc i208 %tmp7 to i32
+  %tmp9 = and i32 %tmp8, 65535
+  %tmp10 = shl nuw nsw i32 %tmp9, 1
+  %tmp11 = add i32 0, 2
+  %tmp12 = add i32 %tmp11, 0
+  %tmp13 = add i32 %tmp12, %tmp10
+  %tmp14 = lshr i32 %tmp13, 2
+  %tmp15 = trunc i32 %tmp14 to i16
+  store i16 %tmp15, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 0, i64 3), align 2
+  %tmp16 = lshr i208 %tmp, 96
+  %tmp17 = trunc i208 %tmp16 to i32
+  %tmp18 = and i32 %tmp17, 65535
+  %tmp19 = add i32 %tmp18, 2
+  %tmp20 = add i32 %tmp19, 0
+  %tmp21 = add i32 %tmp20, 0
+  %tmp22 = lshr i32 %tmp21, 2
+  %tmp23 = trunc i32 %tmp22 to i16
+  store i16 %tmp23, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 2, i64 3), align 2
+  %tmp24 = add i32 %tmp6, %tmp9
+  %tmp25 = add i32 %tmp24, 0
+  %tmp26 = lshr i32 %tmp25, 2
+  %tmp27 = trunc i32 %tmp26 to i16
+  store i16 %tmp27, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 2), align 4
+  %tmp28 = lshr i208 %tmp, 80
+  %tmp29 = shl nuw nsw i208 %tmp28, 1
+  %tmp30 = trunc i208 %tmp29 to i32
+  %tmp31 = and i32 %tmp30, 131070
+  %tmp32 = add i32 %tmp12, %tmp31
+  %tmp33 = lshr i32 %tmp32, 2
+  %tmp34 = trunc i32 %tmp33 to i16
+  store i16 %tmp34, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 3), align 2
+  br label %bb35
+
+bb35:                                             ; preds = %bb5, %bb4
+  unreachable
+}
diff --git a/test/CodeGen/X86/sext-setcc-self.ll b/test/CodeGen/X86/sext-setcc-self.ll
new file mode 100644
index 0000000..23d66a2
--- /dev/null
+++ b/test/CodeGen/X86/sext-setcc-self.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=x86-64 -mcpu=nehalem -asm-verbose=false < %s | FileCheck %s
+
+define <4 x i32> @test_ueq(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ueq <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_uge(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp uge <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ule(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ule <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_one(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp one <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ogt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ogt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_olt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp olt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b747cc5..1de9151 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,13 +1,27 @@
-; RUN: llc < %s -march=x86    | grep and | count 2
-; RUN: llc < %s -march=x86-64 | not grep and 
+; RUN: llc < %s -mtriple=i386-apple-macosx   | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=X64
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
+; X32: t1:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t1:
+; X64-NOT: andl
+; X64: shll
        %shamt = and i32 %t, 31
        %res = shl i32 %val, %shamt
        ret i32 %res
 }
 
 define i32 @t2(i32 %t, i32 %val) nounwind {
+; X32: t2:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t2:
+; X64-NOT: andl
+; X64: shll
        %shamt = and i32 %t, 63
        %res = shl i32 %val, %shamt
        ret i32 %res
@@ -16,6 +30,13 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
 @X = internal global i16 0
 
 define void @t3(i16 %t) nounwind {
+; X32: t3:
+; X32-NOT: andl
+; X32: sarw
+
+; X64: t3:
+; X64-NOT: andl
+; X64: sarw
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -24,13 +45,34 @@ define void @t3(i16 %t) nounwind {
 }
 
 define i64 @t4(i64 %t, i64 %val) nounwind {
+; X64: t4:
+; X64-NOT: and
+; X64: shrq
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
 
 define i64 @t5(i64 %t, i64 %val) nounwind {
+; X64: t5:
+; X64-NOT: and
+; X64: shrq
        %shamt = and i64 %t, 191
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+
+; rdar://11866926
+define i64 @t6(i64 %key, i64* nocapture %val) nounwind {
+entry:
+; X64: t6:
+; X64-NOT: movabsq
+; X64: decq
+; X64: andq
+  %shr = lshr i64 %key, 3
+  %0 = load i64* %val, align 8
+  %sub = add i64 %0, 2305843009213693951
+  %and = and i64 %sub, %shr
+  ret i64 %and
+}
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index d38f9a8..4f27e97 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {shld.*CL}
+; RUN:   grep "shld.*CL"
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep {mov CL, BL}
+; RUN:   not grep "mov CL, BL"
 
 ; PR687
 
diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
index 5adee7c..8d2b290 100644
--- a/test/CodeGen/X86/shift-double.ll
+++ b/test/CodeGen/X86/shift-double.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {sh\[lr\]d} | count 5
+; RUN:   grep "sh[lr]d" | count 5
 
 define i64 @test1(i64 %X, i8 %C) {
         %shift.upgrd.1 = zext i8 %C to i64              ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 3ea6011..c518cdd 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -verify-coalescing | FileCheck %s
 
 define i32* @test1(i32* %P, i32 %X) {
 ; CHECK: test1:
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index 0827221..83e1eb5 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
-; RUN: llc < %s -march=x86 | grep {shrl	.eax}
-; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
+; RUN: llc < %s -march=x86 | grep "movl	8(.esp), %eax"
+; RUN: llc < %s -march=x86 | grep "shrl	.eax"
+; RUN: llc < %s -march=x86 | grep "movswl	.ax, .eax"
 
 define i32 @test1(i64 %a) nounwind {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 13f9329..1479be1 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,8 +1,6 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
-; RUN:   grep sin\$ | count 3
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
-; RUN:   grep cos\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=SIN
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=COS
 
 declare float  @sinf(float) readonly
 
@@ -10,39 +8,59 @@ declare double @sin(double) readonly
 
 declare x86_fp80 @sinl(x86_fp80) readonly
 
+; SIN: test1:
 define float @test1(float %X) {
         %Y = call float @sinf(float %X) readonly
         ret float %Y
 }
+; SIN: {{^[ \t]*fsin$}}
 
+; SIN-NOT: fsin
+
+; SIN: test2:
 define double @test2(double %X) {
         %Y = call double @sin(double %X) readonly
         ret double %Y
 }
+; SIN: {{^[ \t]*fsin$}}
+
+; SIN-NOT: fsin
 
+; SIN: test3:
 define x86_fp80 @test3(x86_fp80 %X) {
         %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
+; SIN: {{^[ \t]*fsin$}}
 
+; SIN-NOT: fsin
+; COS-NOT: fcos
 declare float @cosf(float) readonly
 
 declare double @cos(double) readonly
 
 declare x86_fp80 @cosl(x86_fp80) readonly
 
+
+; SIN: test4:
+; COS: test3:
 define float @test4(float %X) {
         %Y = call float @cosf(float %X) readonly
         ret float %Y
 }
+; COS: {{^[ \t]*fcos}}
 
 define double @test5(double %X) {
         %Y = call double @cos(double %X) readonly
         ret double %Y
 }
+; COS: {{^[ \t]*fcos}}
 
 define x86_fp80 @test6(x86_fp80 %X) {
         %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
+; COS: {{^[ \t]*fcos}}
 
+; SIN-NOT: fsin
+; COS-NOT: fcos
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 7957eb8..649cd61 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
new file mode 100644
index 0000000..c600f925
--- /dev/null
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; A MOV32ri is inside a loop, it has two successors, one successor is inside the
+; same loop, the other successor is outside the loop. We should be able to sink
+; MOV32ri outside the loop.
+; rdar://11980766
+define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
+; CHECK: sink_succ
+; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
+; CHECK: %exit
+; CHECK-NOT: movl
+; CHECK: jne [[OUTER_LN1]]
+; CHECK: movl
+; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2
+; CHECK: jne [[LN2]]
+; CHECK: ret
+entry:
+  br label %preheader
+
+preheader:
+  %i.127 = phi i32 [ 0, %entry ], [ %inc9, %exit ]
+  br label %for.body1.lr
+
+for.body1.lr:
+  %iv30 = phi i32 [ 1, %preheader ], [ %iv.next31, %for.inc40.i ]
+  br label %for.body1
+
+for.body1:
+  %iv.i = phi i64 [ 0, %for.body1.lr ], [ %iv.next.i, %for.body1 ]
+  %iv.next.i = add i64 %iv.i, 1
+  %lftr.wideiv32 = trunc i64 %iv.next.i to i32
+  %exitcond33 = icmp eq i32 %lftr.wideiv32, %iv30
+  br i1 %exitcond33, label %for.inc40.i, label %for.body1
+
+for.inc40.i:
+  %iv.next31 = add i32 %iv30, 1
+  %exitcond49.i = icmp eq i32 %iv.next31, 32
+  br i1 %exitcond49.i, label %exit, label %for.body1.lr
+
+exit:
+  %inc9 = add nsw i32 %i.127, 1
+  %exitcond34 = icmp eq i32 %inc9, 10
+  br i1 %exitcond34, label %for.body2, label %preheader
+
+for.body2:
+  %iv = phi i64 [ %iv.next, %for.body2 ], [ 0, %exit ]
+  %iv.next = add i64 %iv, 1
+  %lftr.wideiv = trunc i64 %iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 2048
+  br i1 %exitcond, label %for.end20, label %for.body2
+
+for.end20:
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 81a072f..980f18c 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -mcpu=nehalem | FileCheck %s
 ; rdar://7434544
 
 define <2 x i64> @t2() nounwind {
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 118e393..71a42f4 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem | FileCheck %s
 
 ; CHECK: a:
 ; CHECK: movdqu
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index d1e07c8..c99287b 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=nehalem | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
 
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 1112440..3839e87 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -promote-elements | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
@@ -8,13 +8,10 @@
 ; and a conditional branch.
 
 ; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
-; x_ : use 0.0 instead of %y
-; y_ : use -0.0 instead of %y
+;  _x: use 0.0 instead of %y
+;  _y: use -0.0 instead of %y
 ; _inverse : swap the arms of the select.
 
-; Some of these tests depend on -join-physregs commuting instructions to
-; eliminate copies.
-
 ; CHECK:      ogt:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
@@ -139,147 +136,147 @@ define double @ole_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      x_ogt:
+; CHECK:      ogt_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_ogt:
+; UNSAFE:      ogt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ogt:
+; FINITE:      ogt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ogt(double %x) nounwind {
+define double @ogt_x(double %x) nounwind {
   %c = fcmp ogt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_olt:
+; CHECK:      olt_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_olt:
+; UNSAFE:      olt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_olt:
+; FINITE:      olt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_olt(double %x) nounwind {
+define double @olt_x(double %x) nounwind {
   %c = fcmp olt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_ogt_inverse:
+; CHECK:      ogt_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_ogt_inverse:
+; UNSAFE:      ogt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ogt_inverse:
+; FINITE:      ogt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ogt_inverse(double %x) nounwind {
+define double @ogt_inverse_x(double %x) nounwind {
   %c = fcmp ogt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_olt_inverse:
+; CHECK:      olt_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_olt_inverse:
+; UNSAFE:      olt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_olt_inverse:
+; FINITE:      olt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_olt_inverse(double %x) nounwind {
+define double @olt_inverse_x(double %x) nounwind {
   %c = fcmp olt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_oge:
+; CHECK:      oge_x:
 ; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      x_oge:
+; UNSAFE:      oge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_oge:
+; FINITE:      oge_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_oge(double %x) nounwind {
+define double @oge_x(double %x) nounwind {
   %c = fcmp oge double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_ole:
+; CHECK:      ole_x:
 ; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      x_ole:
+; UNSAFE:      ole_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ole:
+; FINITE:      ole_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ole(double %x) nounwind {
+define double @ole_x(double %x) nounwind {
   %c = fcmp ole double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_oge_inverse:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      x_oge_inverse:
+; CHECK:      oge_inverse_x:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      oge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_oge_inverse:
+; FINITE:      oge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_oge_inverse(double %x) nounwind {
+define double @oge_inverse_x(double %x) nounwind {
   %c = fcmp oge double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_ole_inverse:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      x_ole_inverse:
+; CHECK:      ole_inverse_x:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ole_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ole_inverse:
+; FINITE:      ole_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ole_inverse(double %x) nounwind {
+define double @ole_inverse_x(double %x) nounwind {
   %c = fcmp ole double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
@@ -411,419 +408,419 @@ define double @ule_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      x_ugt:
+; CHECK:      ugt_x:
 ; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      x_ugt:
+; UNSAFE:      ugt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ugt:
+; FINITE:      ugt_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ugt(double %x) nounwind {
+define double @ugt_x(double %x) nounwind {
   %c = fcmp ugt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_ult:
+; CHECK:      ult_x:
 ; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      x_ult:
+; UNSAFE:      ult_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ult:
+; FINITE:      ult_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ult(double %x) nounwind {
+define double @ult_x(double %x) nounwind {
   %c = fcmp ult double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_ugt_inverse:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      x_ugt_inverse:
+; CHECK:      ugt_inverse_x:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ugt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ugt_inverse:
+; FINITE:      ugt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ugt_inverse(double %x) nounwind {
+define double @ugt_inverse_x(double %x) nounwind {
   %c = fcmp ugt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_ult_inverse:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      x_ult_inverse:
+; CHECK:      ult_inverse_x:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ult_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ult_inverse:
+; FINITE:      ult_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ult_inverse(double %x) nounwind {
+define double @ult_inverse_x(double %x) nounwind {
   %c = fcmp ult double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_uge:
+; CHECK:      uge_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_uge:
+; UNSAFE:      uge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_uge:
+; FINITE:      uge_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_uge(double %x) nounwind {
+define double @uge_x(double %x) nounwind {
   %c = fcmp uge double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_ule:
+; CHECK:      ule_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_ule:
+; UNSAFE:      ule_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ule:
+; FINITE:      ule_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ule(double %x) nounwind {
+define double @ule_x(double %x) nounwind {
   %c = fcmp ule double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
   ret double %d
 }
 
-; CHECK:      x_uge_inverse:
+; CHECK:      uge_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_uge_inverse:
+; UNSAFE:      uge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_uge_inverse:
+; FINITE:      uge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_uge_inverse(double %x) nounwind {
+define double @uge_inverse_x(double %x) nounwind {
   %c = fcmp uge double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      x_ule_inverse:
+; CHECK:      ule_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      x_ule_inverse:
+; UNSAFE:      ule_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      x_ule_inverse:
+; FINITE:      ule_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @x_ule_inverse(double %x) nounwind {
+define double @ule_inverse_x(double %x) nounwind {
   %c = fcmp ule double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_ogt:
+; CHECK:      ogt_y:
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_ogt:
+; UNSAFE:      ogt_y:
 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ogt:
+; FINITE:      ogt_y:
 ; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ogt(double %x) nounwind {
+define double @ogt_y(double %x) nounwind {
   %c = fcmp ogt double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_olt:
+; CHECK:      olt_y:
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_olt:
+; UNSAFE:      olt_y:
 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_olt:
+; FINITE:      olt_y:
 ; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_olt(double %x) nounwind {
+define double @olt_y(double %x) nounwind {
   %c = fcmp olt double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_ogt_inverse:
+; CHECK:      ogt_inverse_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_ogt_inverse:
+; UNSAFE:      ogt_inverse_y:
 ; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ogt_inverse:
+; FINITE:      ogt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ogt_inverse(double %x) nounwind {
+define double @ogt_inverse_y(double %x) nounwind {
   %c = fcmp ogt double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_olt_inverse:
+; CHECK:      olt_inverse_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_olt_inverse:
+; UNSAFE:      olt_inverse_y:
 ; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_olt_inverse:
+; FINITE:      olt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_olt_inverse(double %x) nounwind {
+define double @olt_inverse_y(double %x) nounwind {
   %c = fcmp olt double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_oge:
+; CHECK:      oge_y:
 ; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      y_oge:
+; UNSAFE:      oge_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_oge:
+; FINITE:      oge_y:
 ; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_oge(double %x) nounwind {
+define double @oge_y(double %x) nounwind {
   %c = fcmp oge double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_ole:
+; CHECK:      ole_y:
 ; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      y_ole:
+; UNSAFE:      ole_y:
 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ole:
+; FINITE:      ole_y:
 ; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ole(double %x) nounwind {
+define double @ole_y(double %x) nounwind {
   %c = fcmp ole double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_oge_inverse:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      y_oge_inverse:
+; CHECK:      oge_inverse_y:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      oge_inverse_y:
 ; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_oge_inverse:
+; FINITE:      oge_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_oge_inverse(double %x) nounwind {
+define double @oge_inverse_y(double %x) nounwind {
   %c = fcmp oge double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_ole_inverse:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      y_ole_inverse:
+; CHECK:      ole_inverse_y:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ole_inverse_y:
 ; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ole_inverse:
+; FINITE:      ole_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ole_inverse(double %x) nounwind {
+define double @ole_inverse_y(double %x) nounwind {
   %c = fcmp ole double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_ugt:
+; CHECK:      ugt_y:
 ; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      y_ugt:
+; UNSAFE:      ugt_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ugt:
+; FINITE:      ugt_y:
 ; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ugt(double %x) nounwind {
+define double @ugt_y(double %x) nounwind {
   %c = fcmp ugt double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_ult:
+; CHECK:      ult_y:
 ; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      y_ult:
+; UNSAFE:      ult_y:
 ; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ult:
+; FINITE:      ult_y:
 ; FINITE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ult(double %x) nounwind {
+define double @ult_y(double %x) nounwind {
   %c = fcmp ult double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_ugt_inverse:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      y_ugt_inverse:
+; CHECK:      ugt_inverse_y:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ugt_inverse_y:
 ; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ugt_inverse:
+; FINITE:      ugt_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ugt_inverse(double %x) nounwind {
+define double @ugt_inverse_y(double %x) nounwind {
   %c = fcmp ugt double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_ult_inverse:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      y_ult_inverse:
+; CHECK:      ult_inverse_y:
+; CHECK:      ucomisd %xmm
+; UNSAFE:      ult_inverse_y:
 ; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ult_inverse:
+; FINITE:      ult_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ult_inverse(double %x) nounwind {
+define double @ult_inverse_y(double %x) nounwind {
   %c = fcmp ult double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_uge:
+; CHECK:      uge_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_uge:
+; UNSAFE:      uge_y:
 ; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_uge:
+; FINITE:      uge_y:
 ; FINITE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_uge(double %x) nounwind {
+define double @uge_y(double %x) nounwind {
   %c = fcmp uge double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_ule:
+; CHECK:      ule_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_ule:
+; UNSAFE:      ule_y:
 ; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ule:
+; FINITE:      ule_y:
 ; FINITE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ule(double %x) nounwind {
+define double @ule_y(double %x) nounwind {
   %c = fcmp ule double %x, -0.000000e+00
   %d = select i1 %c, double %x, double -0.000000e+00
   ret double %d
 }
 
-; CHECK:      y_uge_inverse:
+; CHECK:      uge_inverse_y:
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_uge_inverse:
+; UNSAFE:      uge_inverse_y:
 ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_uge_inverse:
+; FINITE:      uge_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_uge_inverse(double %x) nounwind {
+define double @uge_inverse_y(double %x) nounwind {
   %c = fcmp uge double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
 }
 
-; CHECK:      y_ule_inverse:
+; CHECK:      ule_inverse_y:
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      y_ule_inverse:
+; UNSAFE:      ule_inverse_y:
 ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      y_ule_inverse:
+; FINITE:      ule_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
-define double @y_ule_inverse(double %x) nounwind {
+define double @ule_inverse_y(double %x) nounwind {
   %c = fcmp ule double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
   ret double %d
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 5ea1b4d..48638b3 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -249,9 +249,10 @@ entry:
 ; X64: 	t16:
 ; X64: 		pextrw	$8, %xmm0, %eax
 ; X64: 		pslldq	$2, %xmm0
-; X64: 		movd	%xmm0, %ecx
-; X64: 		pextrw	$1, %xmm0, %edx
-; X64: 		pinsrw	$0, %ecx, %xmm0
+; X64: 		pextrw	$1, %xmm0, %ecx
+; X64: 		movzbl	%cl, %ecx
+; X64: 		orl	%eax, %ecx
+; X64: 		pinsrw	$1, %ecx, %xmm0
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index 1a1017d..a2a0deb 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
 
 ;CHECK: vsel_float
 ;CHECK: blendvps
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 54264b1..c6f9f0c 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
 
 @g16 = external global i16
 
diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll
new file mode 100644
index 0000000..076e213
--- /dev/null
+++ b/test/CodeGen/X86/sse4a.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s
+
+define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
+; CHECK: test1:
+; CHECK: movntss
+  tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
+  ret void
+}
+
+declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
+
+define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp {
+; CHECK: test2:
+; CHECK: movntsd
+  tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
+  ret void
+}
+
+declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
+
+define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp {
+; CHECK: test3:
+; CHECK: extrq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+  ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test4:
+; CHECK: extrq
+  %1 = bitcast <2 x i64> %y to <16 x i8>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+  ret <2 x i64> %2
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test5:
+; CHECK: insertq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
+  ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test6(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test6:
+; CHECK: insertq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index a57fa58..fd8db3b 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic 2>&1 | FileCheck %s
 ; CHECK: fail
 ; CHECK-NOT: fail
 
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index f6c13ec..0ddb237 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -10,11 +10,11 @@ target triple = "i686-apple-darwin8"
 define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 ) readnone	; <double> [#uses=1]
         store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
-	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 ) readnone	; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store volatile double %tmp6, double* %P, align 8
diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector.ll
index fe2a9c5..c075114 100644
--- a/test/CodeGen/X86/stack-protector-linux.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
 ; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard}
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail}
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard"
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail"
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 8313166..6e4fe90 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index a297728..4f31ab5 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+; RUN: llc < %s -march=x86-64 | grep "leal	.*), %e.*" | count 1
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index 0ea5541..0693789 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -5,7 +5,7 @@
 ; RUN: not grep negq %t
 ; RUN: not grep addq %t
 ; RUN: not grep subq %t
-; RUN: not grep {movl	%} %t
+; RUN: not grep "movl	%" %t
 
 ; Utilize implicit zero-extension on x86-64 to eliminate explicit
 ; zero-extensions. Shrink 64-bit adds to 32-bit when the high
diff --git a/test/CodeGen/X86/switch-order-weight.ll b/test/CodeGen/X86/switch-order-weight.ll
new file mode 100644
index 0000000..0fdd56d
--- /dev/null
+++ b/test/CodeGen/X86/switch-order-weight.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=x86_64-apple-darwin11 < %s | FileCheck %s
+
+; Check that the cases which lead to unreachable are checked after "10"
+
+define void @test1(i32 %x) nounwind uwtable ssp {
+entry:
+  switch i32 %x, label %if.end7 [
+    i32 0, label %if.then
+    i32 10, label %if.then2
+    i32 20, label %if.then5
+  ]
+
+; CHECK: test1:
+; CHECK-NOT: unr
+; CHECK: cmpl $10
+; CHECK: bar
+; CHECK: cmpl $20
+
+if.then:
+  tail call void @unr(i32 23) noreturn nounwind
+  unreachable
+
+if.then2:
+  tail call void @bar(i32 42) nounwind
+  br label %if.end7
+
+if.then5:
+  tail call void @unr(i32 5) noreturn nounwind
+  unreachable
+
+if.end7:
+  ret void
+}
+
+declare void @unr(i32) noreturn
+
+declare void @bar(i32)
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
new file mode 100644
index 0000000..7030753
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.4.0"
+
+declare i64 @testi()
+
+define i64 @test_trivial() {
+ %A = tail call i64 @testi()
+ ret i64 %A
+}
+; CHECK: test_trivial:
+; CHECK: jmp	_testi                  ## TAILCALL
+
+
+define i64 @test_noop_bitcast() {
+ %A = tail call i64 @testi()
+ %B = bitcast i64 %A to i64
+ ret i64 %B
+}
+; CHECK: test_noop_bitcast:
+; CHECK: jmp	_testi                  ## TAILCALL
+
+
+; Tail call shouldn't be blocked by no-op inttoptr.
+define i8* @test_inttoptr() {
+  %A = tail call i64 @testi()
+  %B = inttoptr i64 %A to i8*
+  ret i8* %B
+}
+
+; CHECK: test_inttoptr:
+; CHECK: jmp	_testi                  ## TAILCALL
+
+
+declare <4 x float> @testv()
+
+define <4 x i32> @test_vectorbitcast() {
+  %A = tail call <4 x float> @testv()
+  %B = bitcast <4 x float> %A to <4 x i32>
+  ret <4 x i32> %B
+}
+; CHECK: test_vectorbitcast:
+; CHECK: jmp	_testv                  ## TAILCALL
+
+
+declare { i64, i64 } @testp()
+
+define {i64, i64} @test_pair_trivial() {
+  %A = tail call { i64, i64} @testp()
+  ret { i64, i64} %A
+}
+; CHECK: test_pair_trivial:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+
+
+define {i64, i64} @test_pair_trivial_extract() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %b = insertvalue {i64, i64} undef, i64 %x, 0
+  %c = insertvalue {i64, i64} %b, i64 %y, 1
+  
+  ret { i64, i64} %c
+}
+
+; CHECK: test_pair_trivial_extract:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+define {i8*, i64} @test_pair_conv_extract() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %x1 = inttoptr i64 %x to i8*
+  
+  %b = insertvalue {i8*, i64} undef, i8* %x1, 0
+  %c = insertvalue {i8*, i64} %b, i64 %y, 1
+  
+  ret { i8*, i64} %c
+}
+
+; CHECK: test_pair_conv_extract:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+
+
+; PR13006
+define { i64, i64 } @crash(i8* %this) {
+  %c = tail call { i64, i64 } @testp()
+  %mrv7 = insertvalue { i64, i64 } %c, i64 undef, 1
+  ret { i64, i64 } %mrv7
+}
+
+
diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll
new file mode 100644
index 0000000..a80b90f
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Teach CGP to dup returns to enable tail call optimization.
+; rdar://9147433
+
+define i32 @foo(i32 %x) nounwind ssp {
+; CHECK: foo:
+entry:
+  switch i32 %x, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb5
+    i32 5, label %sw.bb7
+    i32 6, label %sw.bb9
+  ]
+
+sw.bb:                                            ; preds = %entry
+; CHECK: jmp _f1
+  %call = tail call i32 @f1() nounwind
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+; CHECK: jmp _f2
+  %call2 = tail call i32 @f2() nounwind
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+; CHECK: jmp _f3
+  %call4 = tail call i32 @f3() nounwind
+  br label %return
+
+sw.bb5:                                           ; preds = %entry
+; CHECK: jmp _f4
+  %call6 = tail call i32 @f4() nounwind
+  br label %return
+
+sw.bb7:                                           ; preds = %entry
+; CHECK: jmp _f5
+  %call8 = tail call i32 @f5() nounwind
+  br label %return
+
+sw.bb9:                                           ; preds = %entry
+; CHECK: jmp _f6
+  %call10 = tail call i32 @f6() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+  %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare i32 @f3()
+
+declare i32 @f4()
+
+declare i32 @f5()
+
+declare i32 @f6()
+
+; rdar://11958338
+%0 = type opaque
+
+declare i8* @bar(i8*) uwtable optsize noinline ssp
+
+define hidden %0* @thingWithValue(i8* %self) uwtable ssp {
+entry:
+; CHECK: thingWithValue:
+; CHECK: jmp _bar
+  br i1 undef, label %if.then.i, label %if.else.i
+
+if.then.i:                                        ; preds = %entry
+  br label %someThingWithValue.exit
+
+if.else.i:                                        ; preds = %entry
+  %call4.i = tail call i8* @bar(i8* undef) optsize
+  br label %someThingWithValue.exit
+
+someThingWithValue.exit:                          ; preds = %if.else.i, %if.then.i
+  %retval.0.in.i = phi i8* [ undef, %if.then.i ], [ %call4.i, %if.else.i ]
+  %retval.0.i = bitcast i8* %retval.0.in.i to %0*
+  ret %0* %retval.0.i
+}
diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
deleted file mode 100644
index 8ef1f11..0000000
--- a/test/CodeGen/X86/tailcall-i1.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-define fastcc i1 @i1test(i32, i32, i32, i32) {
-  entry:
-  %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
-  ret i1 %4
-}
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c3f4278..e9b8721 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() {
 ;  CHECK: pushq
 ; Pass the stack argument.
 ;  CHECK: movl $7, 16(%rsp)
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into a register which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %rax
 ; Pass the register arguments, in the right registers.
 ;  CHECK: movl $1, %edi
 ;  CHECK: movl $2, %esi
@@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() {
 ;  CHECK: movl $4, %ecx
 ;  CHECK: movl $5, %r8d
 ;  CHECK: movl $6, %r9d
-; This is the large code model, so &manyargs_callee may not fit into
-; the jmp instruction.  Put it into R11, which won't be clobbered
-; while restoring callee-saved registers and won't be used for passing
-; arguments.
-;  CHECK: movabsq $manyargs_callee, %rax
 ; Adjust the stack to "return".
 ;  CHECK: popq
 ; And tail-call to the target.
diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
deleted file mode 100644
index 4e578d1..0000000
--- a/test/CodeGen/X86/tailcall-void.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-define fastcc void @i1test(i32, i32, i32, i32) {
-  entry:
-   tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
-   ret void 
-}
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall.ll
index f7ff5d5..36a38e0 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 7
 
 ; With -tailcallopt, CodeGen guarantees a tail call optimization
 ; for all of these.
@@ -38,3 +38,15 @@ define fastcc i32 @noret() nounwind {
   tail call fastcc void @does_not_return()
   unreachable
 }
+
+define fastcc void @void_test(i32, i32, i32, i32) {
+  entry:
+   tail call fastcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
+   ret void 
+}
+
+define fastcc i1 @i1test(i32, i32, i32, i32) {
+  entry:
+  %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+  ret i1 %4
+}
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 03d6f94..118eee6 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/targetLoweringGeneric.ll b/test/CodeGen/X86/targetLoweringGeneric.ll
new file mode 100644
index 0000000..ba5f8f8
--- /dev/null
+++ b/test/CodeGen/X86/targetLoweringGeneric.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
+
+; Gather non-machine specific tests for the transformations in
+; CodeGen/SelectionDAG/TargetLowering.  Currently, these
+; can't be tested easily by checking the SDNodes that are
+; the data structures that these transformations act on.
+; Therefore, use X86 assembler output to check against.
+
+; rdar://11195364 A problem with the transformation:
+;  If all of the demanded bits on one side are known, and all of the set
+;  bits on that side are also known to be set on the other side, turn this
+;  into an AND, as we know the bits will be cleared.
+; The known set (one) bits for the arguments %xor1 are not the same, so the
+; transformation should not occur
+define void @foo(i32 %i32In1, i32 %i32In2, i32 %i32In3, i32 %i32In4, 
+                 i32 %i32In5, i32 %i32In6, i32* %i32StarOut, i1 %i1In1, 
+                 i32* %i32SelOut) nounwind {
+    %and3 = and i32 %i32In1, 1362779777
+    %or2 = or i32 %i32In2, %i32In3
+    %and2 = and i32 %or2, 1362779777
+    %xor3 = xor i32 %and3, %and2
+    ; CHECK: shll
+    %shl1 = shl i32 %xor3, %i32In4
+    %sub1 = sub i32 %or2, %shl1
+    %add1 = add i32 %sub1, %i32In5
+    %and1 = and i32 %add1, 1
+    %xor2 = xor i32 %and1, 1
+    %or1 = or i32 %xor2, 364806994 ;0x15BE8352
+    ; CHECK-NOT: andl $96239955
+    %xor1 = xor i32 %or1, 268567040 ;0x10020200
+    ; force an output so not DCE'd
+    store i32 %xor1, i32* %i32StarOut
+    ; force not fast isel by using a select
+    %i32SelVal = select i1 %i1In1, i32 %i32In1, i32 %xor1
+    store i32 %i32SelVal, i32* %i32SelOut
+    ; CHECK: ret
+    ret void
+}
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
index a7be483..0507cb8 100644
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -10,7 +10,7 @@ declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result
 
 define void @testv() nounwind {
 ; CHECK: testv:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
 ; CHECK-NEXT: movl	%esi, (%esp)
 ; CHECK-NEXT: calll _ZN1CC1Ev
 ; CHECK: leal 8(%esp), %eax
@@ -29,7 +29,7 @@ entry:
 
 define void @test2v() nounwind {
 ; CHECK: test2v:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
 ; CHECK-NEXT: movl	%esi, (%esp)
 ; CHECK-NEXT: calll _ZN1CC1Ev
 ; CHECK: leal 8(%esp), %eax
diff --git a/test/CodeGen/X86/tls-local-dynamic.ll b/test/CodeGen/X86/tls-local-dynamic.ll
new file mode 100644
index 0000000..c5fd16b
--- /dev/null
+++ b/test/CodeGen/X86/tls-local-dynamic.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck  %s
+
+@x = internal thread_local global i32 0, align 4
+@y = internal thread_local global i32 0, align 4
+
+; get_x and get_y are here to prevent x and y to be optimized away as 0
+
+define i32* @get_x() {
+entry:
+  ret i32* @x
+; FIXME: This function uses a single thread-local variable,
+; so we might want to fall back to general-dynamic here.
+; CHECK:       get_x:
+; CHECK:       leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT:  callq __tls_get_addr@PLT
+; CHECK:       x@DTPOFF
+}
+
+define i32* @get_y() {
+entry:
+  ret i32* @y
+}
+
+define i32 @f(i32 %i) {
+entry:
+  %cmp = icmp eq i32 %i, 1
+  br i1 %cmp, label %return, label %if.else
+; This bb does not access TLS, so should not call __tls_get_addr.
+; CHECK:       f:
+; CHECK-NOT:   __tls_get_addr
+; CHECK:       je
+
+
+if.else:
+  %0 = load i32* @x, align 4
+  %cmp1 = icmp eq i32 %i, 2
+  br i1 %cmp1, label %if.then2, label %return
+; Now we call __tls_get_addr.
+; CHECK:       # %if.else
+; CHECK:       leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT:  callq __tls_get_addr@PLT
+; CHECK:       x@DTPOFF
+
+
+if.then2:
+  %1 = load i32* @y, align 4
+  %add = add nsw i32 %1, %0
+  br label %return
+; This accesses TLS, but is dominated by the previous block,
+; so should not have to call __tls_get_addr again.
+; CHECK:       # %if.then2
+; CHECK-NOT:   __tls_get_addr
+; CHECK:       y@DTPOFF
+
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
new file mode 100644
index 0000000..7c527e2
--- /dev/null
+++ b/test/CodeGen/X86/tls-models.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64_PIC %s
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32_PIC %s
+
+; Darwin always uses the same model.
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN %s
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+  ret i32* @external_gd
+
+  ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+  ; X64:     f1:
+  ; X64:     external_gd@GOTTPOFF
+  ; X32:     f1:
+  ; X32:     external_gd@INDNTPOFF
+  ; X64_PIC: f1:
+  ; X64_PIC: external_gd@TLSGD
+  ; X32_PIC: f1:
+  ; X32_PIC: external_gd@TLSGD
+  ; DARWIN:  f1:
+  ; DARWIN:  _external_gd@TLVP
+}
+
+define i32* @f2() {
+entry:
+  ret i32* @internal_gd
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic.
+  ; X64:     f2:
+  ; X64:     internal_gd@TPOFF
+  ; X32:     f2:
+  ; X32:     internal_gd@NTPOFF
+  ; X64_PIC: f2:
+  ; X64_PIC: internal_gd@TLSLD
+  ; X32_PIC: f2:
+  ; X32_PIC: internal_gd@TLSLDM
+  ; DARWIN:  f2:
+  ; DARWIN:  _internal_gd@TLVP
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+  ret i32* @external_ld
+
+  ; Non-PIC code can use initial exec, PIC code use local dynamic as specified.
+  ; X64:     f3:
+  ; X64:     external_ld@GOTTPOFF
+  ; X32:     f3:
+  ; X32:     external_ld@INDNTPOFF
+  ; X64_PIC: f3:
+  ; X64_PIC: external_ld@TLSLD
+  ; X32_PIC: f3:
+  ; X32_PIC: external_ld@TLSLDM
+  ; DARWIN:  f3:
+  ; DARWIN:  _external_ld@TLVP
+}
+
+define i32* @f4() {
+entry:
+  ret i32* @internal_ld
+
+  ; Non-PIC code can use local exec, PIC code can use local dynamic.
+  ; X64:     f4:
+  ; X64:     internal_ld@TPOFF
+  ; X32:     f4:
+  ; X32:     internal_ld@NTPOFF
+  ; X64_PIC: f4:
+  ; X64_PIC: internal_ld@TLSLD
+  ; X32_PIC: f4:
+  ; X32_PIC: internal_ld@TLSLDM
+  ; DARWIN:  f4:
+  ; DARWIN:  _internal_ld@TLVP
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+  ret i32* @external_ie
+
+  ; Non-PIC and PIC code will use initial exec as specified.
+  ; X64:     f5:
+  ; X64:     external_ie@GOTTPOFF
+  ; X32:     f5:
+  ; X32:     external_ie@INDNTPOFF
+  ; X64_PIC: f5:
+  ; X64_PIC: external_ie@GOTTPOFF
+  ; X32_PIC: f5:
+  ; X32_PIC: external_ie@GOTNTPOFF
+  ; DARWIN:  f5:
+  ; DARWIN:  _external_ie@TLVP
+}
+
+define i32* @f6() {
+entry:
+  ret i32* @internal_ie
+
+  ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+  ; X64:     f6:
+  ; X64:     internal_ie@TPOFF
+  ; X32:     f6:
+  ; X32:     internal_ie@NTPOFF
+  ; X64_PIC: f6:
+  ; X64_PIC: internal_ie@GOTTPOFF
+  ; X32_PIC: f6:
+  ; X32_PIC: internal_ie@GOTNTPOFF
+  ; DARWIN:  f6:
+  ; DARWIN:  _internal_ie@TLVP
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+  ret i32* @external_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; X64:     f7:
+  ; X64:     external_le@TPOFF
+  ; X32:     f7:
+  ; X32:     external_le@NTPOFF
+  ; X64_PIC: f7:
+  ; X64_PIC: external_le@TPOFF
+  ; X32_PIC: f7:
+  ; X32_PIC: external_le@NTPOFF
+  ; DARWIN:  f7:
+  ; DARWIN:  _external_le@TLVP
+}
+
+define i32* @f8() {
+entry:
+  ret i32* @internal_le
+
+  ; Non-PIC and PIC code will use local exec as specified.
+  ; X64:     f8:
+  ; X64:     internal_le@TPOFF
+  ; X32:     f8:
+  ; X32:     internal_le@NTPOFF
+  ; X64_PIC: f8:
+  ; X64_PIC: internal_le@TPOFF
+  ; X32_PIC: f8:
+  ; X32_PIC: internal_le@NTPOFF
+  ; DARWIN:  f8:
+  ; DARWIN:  _internal_le@TLVP
+}
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index b83416d..51c3d23 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
 
 @i = thread_local global i32 15
+@j = internal thread_local global i32 42
+@k = internal thread_local global i32 42
 
 define i32 @f1() {
 entry:
@@ -64,4 +66,22 @@ entry:
 ; X64:   callq __tls_get_addr@PLT
 
 
+define i32 @f5() nounwind {
+entry:
+	%0 = load i32* @j, align 4
+	%1 = load i32* @k, align 4
+	%add = add nsw i32 %0, %1
+	ret i32 %add
+}
 
+; X32:    f5:
+; X32:      leal {{[jk]}}@TLSLDM(%ebx)
+; X32-NEXT: calll ___tls_get_addr@PLT
+; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
+; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
+
+; X64:    f5:
+; X64:      leaq {{[jk]}}@TLSLD(%rip), %rdi
+; X64-NEXT: callq	__tls_get_addr@PLT
+; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
+; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index e2e58a54..3fca9f5 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
 ; RUN:   | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
 ; RUN:   | FileCheck -check-prefix=X64 %s
 
 @i = thread_local global i32 15
@@ -35,7 +35,12 @@ entry:
 
 define i32 @f3() {
 ; X32: f3:
-; X32:      movl i2@INDNTPOFF, %eax
+; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .L{{[0-9]+}}$pb:
+; X32-NEXT: popl %eax
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax
+; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
 ; X32-NEXT: movl %gs:(%eax), %eax
 ; X32-NEXT: ret
 ; X64: f3:
@@ -50,8 +55,13 @@ entry:
 
 define i32* @f4() {
 ; X32: f4:
-; X32:      movl %gs:0, %eax
-; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .L{{[0-9]+}}$pb:
+; X32-NEXT: popl %ecx
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx
+; X32-NEXT: movl %gs:0, %eax
+; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
 ; X32-NEXT: ret
 ; X64: f4:
 ; X64:      movq %fs:0, %rax
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 03ae6bf..3f44be0 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,9 +1,21 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
-define i32 @test() noreturn nounwind  {
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+; CHECK: test0:
+; CHECK: ud2
+define i32 @test0() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
 	unreachable
 }
 
+; CHECK: test1:
+; CHECK: int3
+define i32 @test1() noreturn nounwind  {
+entry:
+	tail call void @llvm.debugtrap( )
+	unreachable
+}
+
 declare void @llvm.trap() nounwind 
+declare void @llvm.debugtrap() nounwind 
 
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 57d6e97..9877d7b 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
 
 ;CHECK: load_2_i8
 ; A single 16-bit load
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 6f16a25..af6d47a 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
-; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \
+; RUN:   grep "twoaddrinstr" | grep "Number of instructions aggressively commuted"
 ; rdar://6480363
 
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 077fee0..513c304 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
index 41ee194..0536eb0 100644
--- a/test/CodeGen/X86/uint_to_fp.ll
+++ b/test/CodeGen/X86/uint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep "sub.*esp"
 ; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
 ; rdar://6034396
 
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 7416051..56fdadb 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {jc} | count 1
+; RUN: llc < %s -march=x86 | grep "jc" | count 1
 ; XFAIL: *
 
 ; FIXME: umul-with-overflow not supported yet.
diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll
new file mode 100644
index 0000000..a438723
--- /dev/null
+++ b/test/CodeGen/X86/unwindraise.ll
@@ -0,0 +1,252 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR13188
+;
+; The _Unwind_RaiseException function can return normally and via eh.return.
+; This causes confusion about the function live-out registers, since the two
+; different ways of returning have different return values.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-freebsd9.0"
+
+%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i64, i64, i64, [18 x i8] }
+%struct.dwarf_eh_bases = type { i8*, i8*, i8* }
+%struct._Unwind_FrameState = type { %struct.frame_state_reg_info, i64, i64, i8*, i32, i8*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i64, i64, i64, i8, i8, i8, i8, i8* }
+%struct.frame_state_reg_info = type { [18 x %struct.anon], %struct.frame_state_reg_info* }
+%struct.anon = type { %union.anon, i32 }
+%union.anon = type { i64 }
+%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i64, i64 }
+
+@dwarf_reg_size_table = external hidden unnamed_addr global [18 x i8], align 16
+
+declare void @abort() noreturn
+
+declare fastcc i32 @uw_frame_state_for(%struct._Unwind_Context*, %struct._Unwind_FrameState*) uwtable
+
+define hidden i32 @_Unwind_RaiseException(%struct._Unwind_Exception* %exc) uwtable {
+entry:
+  %fs.i = alloca %struct._Unwind_FrameState, align 8
+  %this_context = alloca %struct._Unwind_Context, align 8
+  %cur_context = alloca %struct._Unwind_Context, align 8
+  %fs = alloca %struct._Unwind_FrameState, align 8
+  call void @llvm.eh.unwind.init()
+  %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+  %1 = call i8* @llvm.returnaddress(i32 0)
+  call fastcc void @uw_init_context_1(%struct._Unwind_Context* %this_context, i8* %0, i8* %1)
+  %2 = bitcast %struct._Unwind_Context* %cur_context to i8*
+  %3 = bitcast %struct._Unwind_Context* %this_context to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
+  %personality = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 6
+  %retaddr_column.i = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 9
+  %flags.i.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 5
+  %ra.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 2
+  %exception_class = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 0
+  br label %while.body
+
+while.body:                                       ; preds = %uw_update_context.exit, %entry
+  %call = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
+  switch i32 %call, label %do.end21 [
+    i32 5, label %do.end21.loopexit46
+    i32 0, label %if.end3
+  ]
+
+if.end3:                                          ; preds = %while.body
+  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8, !tbaa !0
+  %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
+  br i1 %tobool, label %if.end13, label %if.then4
+
+if.then4:                                         ; preds = %if.end3
+  %5 = load i64* %exception_class, align 8, !tbaa !3
+  %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
+  switch i32 %call6, label %do.end21.loopexit46 [
+    i32 6, label %while.end
+    i32 8, label %if.end13
+  ]
+
+if.end13:                                         ; preds = %if.then4, %if.end3
+  call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
+  %6 = load i64* %retaddr_column.i, align 8, !tbaa !3
+  %conv.i = trunc i64 %6 to i32
+  %cmp.i.i.i = icmp slt i32 %conv.i, 18
+  br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
+
+cond.true.i.i.i:                                  ; preds = %if.end13
+  call void @abort() noreturn
+  unreachable
+
+cond.end.i.i.i:                                   ; preds = %if.end13
+  %sext.i = shl i64 %6, 32
+  %idxprom.i.i.i = ashr exact i64 %sext.i, 32
+  %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
+  %7 = load i8* %arrayidx.i.i.i, align 1, !tbaa !1
+  %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
+  %8 = load i8** %arrayidx2.i.i.i, align 8, !tbaa !0
+  %9 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %and.i.i.i.i = and i64 %9, 4611686018427387904
+  %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
+  br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
+
+land.lhs.true.i.i.i:                              ; preds = %cond.end.i.i.i
+  %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
+  %10 = load i8* %arrayidx4.i.i.i, align 1, !tbaa !1
+  %tobool6.i.i.i = icmp eq i8 %10, 0
+  br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %land.lhs.true.i.i.i
+  %11 = ptrtoint i8* %8 to i64
+  br label %uw_update_context.exit
+
+if.end.i.i.i:                                     ; preds = %land.lhs.true.i.i.i, %cond.end.i.i.i
+  %cmp8.i.i.i = icmp eq i8 %7, 8
+  br i1 %cmp8.i.i.i, label %if.then10.i.i.i, label %cond.true14.i.i.i
+
+if.then10.i.i.i:                                  ; preds = %if.end.i.i.i
+  %12 = bitcast i8* %8 to i64*
+  %13 = load i64* %12, align 8, !tbaa !3
+  br label %uw_update_context.exit
+
+cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
+  call void @abort() noreturn
+  unreachable
+
+uw_update_context.exit:                           ; preds = %if.then10.i.i.i, %if.then.i.i.i
+  %retval.0.i.i.i = phi i64 [ %11, %if.then.i.i.i ], [ %13, %if.then10.i.i.i ]
+  %14 = inttoptr i64 %retval.0.i.i.i to i8*
+  store i8* %14, i8** %ra.i, align 8, !tbaa !0
+  br label %while.body
+
+while.end:                                        ; preds = %if.then4
+  %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2
+  store i64 0, i64* %private_1, align 8, !tbaa !3
+  %15 = load i8** %ra.i, align 8, !tbaa !0
+  %16 = ptrtoint i8* %15 to i64
+  %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3
+  store i64 %16, i64* %private_2, align 8, !tbaa !3
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
+  %17 = bitcast %struct._Unwind_FrameState* %fs.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %17)
+  %personality.i = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 6
+  %retaddr_column.i22 = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 9
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %uw_update_context.exit44, %while.end
+  %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
+  %18 = load i8** %ra.i, align 8, !tbaa !0
+  %19 = ptrtoint i8* %18 to i64
+  %20 = load i64* %private_2, align 8, !tbaa !3
+  %cmp.i = icmp eq i64 %19, %20
+  %cmp2.i = icmp eq i32 %call.i, 0
+  br i1 %cmp2.i, label %if.end.i, label %do.end21
+
+if.end.i:                                         ; preds = %while.body.i
+  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8, !tbaa !0
+  %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
+  br i1 %tobool.i, label %if.end12.i, label %if.then3.i
+
+if.then3.i:                                       ; preds = %if.end.i
+  %or.i = select i1 %cmp.i, i32 6, i32 2
+  %22 = load i64* %exception_class, align 8, !tbaa !3
+  %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
+  switch i32 %call5.i, label %do.end21 [
+    i32 7, label %do.body19
+    i32 8, label %if.end12.i
+  ]
+
+if.end12.i:                                       ; preds = %if.then3.i, %if.end.i
+  br i1 %cmp.i, label %cond.true.i, label %cond.end.i
+
+cond.true.i:                                      ; preds = %if.end12.i
+  call void @abort() noreturn
+  unreachable
+
+cond.end.i:                                       ; preds = %if.end12.i
+  call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
+  %23 = load i64* %retaddr_column.i22, align 8, !tbaa !3
+  %conv.i23 = trunc i64 %23 to i32
+  %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
+  br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
+
+cond.true.i.i.i25:                                ; preds = %cond.end.i
+  call void @abort() noreturn
+  unreachable
+
+cond.end.i.i.i33:                                 ; preds = %cond.end.i
+  %sext.i26 = shl i64 %23, 32
+  %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
+  %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
+  %24 = load i8* %arrayidx.i.i.i28, align 1, !tbaa !1
+  %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
+  %25 = load i8** %arrayidx2.i.i.i29, align 8, !tbaa !0
+  %26 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %and.i.i.i.i31 = and i64 %26, 4611686018427387904
+  %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
+  br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
+
+land.lhs.true.i.i.i36:                            ; preds = %cond.end.i.i.i33
+  %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
+  %27 = load i8* %arrayidx4.i.i.i34, align 1, !tbaa !1
+  %tobool6.i.i.i35 = icmp eq i8 %27, 0
+  br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
+
+if.then.i.i.i37:                                  ; preds = %land.lhs.true.i.i.i36
+  %28 = ptrtoint i8* %25 to i64
+  br label %uw_update_context.exit44
+
+if.end.i.i.i39:                                   ; preds = %land.lhs.true.i.i.i36, %cond.end.i.i.i33
+  %cmp8.i.i.i38 = icmp eq i8 %24, 8
+  br i1 %cmp8.i.i.i38, label %if.then10.i.i.i40, label %cond.true14.i.i.i41
+
+if.then10.i.i.i40:                                ; preds = %if.end.i.i.i39
+  %29 = bitcast i8* %25 to i64*
+  %30 = load i64* %29, align 8, !tbaa !3
+  br label %uw_update_context.exit44
+
+cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
+  call void @abort() noreturn
+  unreachable
+
+uw_update_context.exit44:                         ; preds = %if.then10.i.i.i40, %if.then.i.i.i37
+  %retval.0.i.i.i42 = phi i64 [ %28, %if.then.i.i.i37 ], [ %30, %if.then10.i.i.i40 ]
+  %31 = inttoptr i64 %retval.0.i.i.i42 to i8*
+  store i8* %31, i8** %ra.i, align 8, !tbaa !0
+  br label %while.body.i
+
+do.body19:                                        ; preds = %if.then3.i
+  call void @llvm.lifetime.end(i64 -1, i8* %17)
+  %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
+  %32 = load i8** %ra.i, align 8, !tbaa !0
+  call void @llvm.eh.return.i64(i64 %call20, i8* %32)
+  unreachable
+
+do.end21.loopexit46:                              ; preds = %if.then4, %while.body
+  %retval.0.ph = phi i32 [ 3, %if.then4 ], [ 5, %while.body ]
+  br label %do.end21
+
+do.end21:                                         ; preds = %do.end21.loopexit46, %if.then3.i, %while.body.i, %while.body
+  %retval.0 = phi i32 [ %retval.0.ph, %do.end21.loopexit46 ], [ 3, %while.body ], [ 2, %while.body.i ], [ 2, %if.then3.i ]
+  ret i32 %retval.0
+}
+
+declare void @llvm.eh.unwind.init() nounwind
+
+declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*) uwtable
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare fastcc i64 @uw_install_context_1(%struct._Unwind_Context*, %struct._Unwind_Context*) uwtable
+
+declare void @llvm.eh.return.i64(i64, i8*) nounwind
+
+declare fastcc void @uw_update_context_1(%struct._Unwind_Context*, %struct._Unwind_FrameState* nocapture) uwtable
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index ae3f55a..569586a 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -1,9 +1,16 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
 ; CHECK: divss
 ; CHECK: divss
 ; CHECK: divps
+
+; Scheduler causes a different instruction order to be produced on Intel Atom
+; ATOM: divps
+; ATOM: divss
+; ATOM: divss
+
 define %vec @vecdiv( %vec %p1, %vec %p2)
 {
   %result = fdiv %vec %p1, %p2
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index f2fc7e7..e0862ca 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
-; RUN:   grep {subl.*60}
+; RUN:   grep "subl.*60"
 ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
-; RUN:   grep {movaps.*32}
+; RUN:   grep "movaps.*32"
 
 
 define void @test() {
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
new file mode 100644
index 0000000..08eb16f
--- /dev/null
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;CHECK: foo1_8
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <8 x float> @foo1_8(<8 x i8> %src) {
+  %res = sitofp <8 x i8> %src to <8 x float>
+  ret <8 x float> %res
+}
+
+;CHECK: foo1_4
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <4 x float> @foo1_4(<4 x i8> %src) {
+  %res = sitofp <4 x i8> %src to <4 x float>
+  ret <4 x float> %res
+}
+
+;CHECK: foo2_8
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <8 x float> @foo2_8(<8 x i8> %src) {
+  %res = uitofp <8 x i8> %src to <8 x float>
+  ret <8 x float> %res
+}
+
+;CHECK: foo2_4
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <4 x float> @foo2_4(<4 x i8> %src) {
+  %res = uitofp <4 x i8> %src to <4 x float>
+  ret <4 x float> %res
+}
+
+;CHECK: foo3_8
+;CHECK: vcvttps2dq
+;CHECK: ret
+define <8 x i8> @foo3_8(<8 x float> %src) {
+  %res = fptosi <8 x float> %src to <8 x i8>
+  ret <8 x i8> %res
+}
+;CHECK: foo3_4
+;CHECK: vcvttps2dq
+;CHECK: ret
+define <4 x i8> @foo3_4(<4 x float> %src) {
+  %res = fptosi <4 x float> %src to <4 x i8>
+  ret <4 x i8> %res
+}
+
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 91777f7..46d6a23 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
 ; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: movzwl
-; CHECK: movzwl
+; CHECK: punpcklwd
 ; CHECK: pshufd
 ; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 39c9b77..367dd27 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
 
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
index 2951193..565be7a 100644
--- a/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep "(%esp,%eax,4)" | count 4
 
 ; Inserts and extracts with variable indices must be lowered
 ; to memory accesses.
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index de3b36f..2a4864a 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index ada17e0..d1d7608 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -o %t
 ; RUN: grep pshufd %t | count 2
 
 define <4 x float> @test(float %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 3656e5f..b8ec0cf 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 | grep "movlhps.*%xmm0, %xmm0"
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 06f38ed..09d4c1a 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
 
 ; sse:  t1:
 ; sse2: t1:
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 861a1cc..b26f920 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index dec98c7..0aff822 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
 
 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -35,4 +35,4 @@ entry:
   store <4 x i64> %vect1487, <4 x i64>* %ap
   store <4 x i64> %vect1488, <4 x i64>* %bp
   ret void;
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
index 7f0fcb5..f5083b4 100644
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
-; RUN: grep pextrw %t | count 13
-; RUN: grep pinsrw %t | count 14
+; RUN: grep pextrw %t | count 12
+; RUN: grep pinsrw %t | count 13
 ; RUN: grep rolw %t | count 13
 ; RUN: not grep esp %t
 ; RUN: not grep ebp %t
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 8090afc..9a06015 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
 
 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 ; CHECK: pshufb
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 430aa04..ed285f9 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2 | FileCheck %s
 ; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 96ef883..ec196df 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 
 define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
 ; CHECK: unpcklpd
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index 55531e3..ee8d2d5 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s
 ; rdar://10050222, rdar://10134392
 
 define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index cde5ae9..f105de4 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd | count 1
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 649b85c..feacc42 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
 ; RUN: grep punpcklwd %t | count 4
 ; RUN: grep punpckhwd %t | count 4
 ; RUN: grep "pshufd" %t | count 8
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
index d9941e6..374acfa 100644
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ b/test/CodeGen/X86/vec_splat-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
 ; RUN: grep punpcklbw %t | count 16
 ; RUN: grep punpckhbw %t | count 16
 ; RUN: grep "pshufd" %t | count 16
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index a87fbd0..24d8487 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
-; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse3 | grep movddup
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 3bd3f7b..c294df5 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -70,3 +70,17 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
 ; CHECK: call
 ; CHECK: roundss $4, %xmm{{.*}}, %xmm0
 }
+
+; PR13576 
+define  <2 x double> @test5() nounwind uwtable readnone noinline {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
+4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
+  ret <2 x double> %0
+; CHECK: test5:
+; CHECK: mov
+; CHECK: mov
+; CHECK: cvtsi2sd
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index 4955156..e775750 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psllq
+; CHECK: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -38,7 +38,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9a9b419..9496893 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psrlq
+; CHECK: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -37,7 +37,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrld
+; CHECK: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -63,7 +63,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 8e8a9aa..b2b48b9 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -28,7 +28,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -52,7 +52,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index cb254ae..f6c311d 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5a:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -34,7 +34,7 @@ define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5c:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shl = shl <4 x i32> %val, %shamt
@@ -47,7 +47,7 @@ define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5d:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index f55b184..d86042a 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -2,7 +2,6 @@
 ; CHECK: incl
 ; CHECK: incl
 ; CHECK: incl
-; CHECK: addl
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 4330aae..ebdfea9 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,7 +1,14 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+
 ; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movd
+; CHECK: movl
+; CHECK: movlpd
+
+; Scheduler causes produce a different instruction order
+; ATOM: movl
+; ATOM: paddd
+; ATOM: movlpd
 
 ; bitcast a v4i16 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 5c695ea..3979ce4 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 136578d..9086d3a 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,9 +1,8 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movl
-; CHECK: movd
+; CHECK: movlpd
 
 ; bitcast a i64 to v2i32
-
 define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
 entry:
 	%conv = bitcast i64 %src to <2 x i32>
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index affd796..1158e04 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
 ; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 4bcac58..8672742 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
 ; widen extract subvector
 
 define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index 4aeec91..d543728 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,12 @@
 ; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
 
-; CHECK: movd  ({{.*}}), {{.*}}
-; CHECK: movd  ({{.*}}), {{.*}}
-; CHECK: movd  {{.*}}, ({{.*}})
-; CHECK: movd  {{.*}}, ({{.*}})
-
-; WIN64: movd  ({{.*}}), {{.*}}
-; WIN64: movd  ({{.*}}), {{.*}}
-; WIN64: movd  {{.*}}, ({{.*}})
-; WIN64: movd  {{.*}}, ({{.*}})
+; CHECK: movl  ({{.*}}), {{.*}}
+; CHECK: movl  ({{.*}}), {{.*}}
+; CHECK: movl  {{.*}}, ({{.*}})
+; CHECK: movl  {{.*}}, ({{.*}})
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index a961c6a..cc11e4c 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,12 +1,9 @@
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
-; Passing the same value in two registers creates a false interference that
-; only -join-physregs resolves. It could also be handled by a parallel copy.
-
 define i64 @foo(i64 %n, i64 %x) nounwind {
 entry:
 
@@ -31,19 +28,19 @@ entry:
 
   %buf1 = alloca i8, i64 %n, align 1
 
-; M64: leaq  15(%rcx), %rax
+; M64: leaq  15(%{{.*}}), %rax
 ; M64: andq  $-16, %rax
 ; M64: callq ___chkstk
 ; M64-NOT:   %rsp
 ; M64: movq  %rsp, %rax
 
-; W64: leaq  15(%rcx), %rax
+; W64: leaq  15(%{{.*}}), %rax
 ; W64: andq  $-16, %rax
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, %rax
 
-; EFI: leaq  15(%rcx), [[R1:%r.*]]
+; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
 ; EFI: andq  $-16, [[R1]]
 ; EFI: movq  %rsp, [[R64:%r.*]]
 ; EFI: subq  [[R1]], [[R64]]
diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
index ec8dd8e..9a959e8 100644
--- a/test/CodeGen/X86/x86-64-arg.ll
+++ b/test/CodeGen/X86/x86-64-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl	%edi, %eax}
+; RUN: llc < %s | grep "movl	%edi, %eax"
 ; The input value is already sign extended, don't re-extend it.
 ; This testcase corresponds to:
 ;   int test(short X) { return (int)X; }
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 79316f2..902c9d5 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | not grep rsp
-; RUN: llc < %s | grep cvttsd2siq
+; RUN: llc < %s -mcpu=nehalem | not grep rsp
+; RUN: llc < %s -mcpu=nehalem | grep cvttsd2siq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
index 46f6d33..46cd4f8 100644
--- a/test/CodeGen/X86/x86-64-pic-1.ll
+++ b/test/CodeGen/X86/x86-64-pic-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq	f@PLT} %t1
+; RUN: grep "callq	f@PLT" %t1
 
 define void @g() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index b6f82e2..3ec172b 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq	g@PLT} %t1
+; RUN: grep "callq	g@PLT" %t1
 
 @g = alias weak i32 ()* @f
 
diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
index 4db331c..fd64beb 100644
--- a/test/CodeGen/X86/x86-64-pic-11.ll
+++ b/test/CodeGen/X86/x86-64-pic-11.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq	__fixunsxfti@PLT} %t1
+; RUN: grep "callq	__fixunsxfti@PLT" %t1
 
 define i128 @f(x86_fp80 %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
index 1ce2de7..f3f7b1d 100644
--- a/test/CodeGen/X86/x86-64-pic-2.ll
+++ b/test/CodeGen/X86/x86-64-pic-2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq	f} %t1
-; RUN: not grep {callq	f@PLT} %t1
+; RUN: grep "callq	f" %t1
+; RUN: not grep "callq	f@PLT" %t1
 
 define void @g() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index aa3c888..ba93378 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq	f} %t1
-; RUN: not grep {callq	f@PLT} %t1
+; RUN: grep "callq	f" %t1
+; RUN: not grep "callq	f@PLT" %t1
 
 define void @g() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index 90fc119..33b08c4 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
+; RUN: grep "movq	a@GOTPCREL(%rip)," %t1
 
 @a = global i32 0
 
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 6369bde..234bc0d 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movl	a(%rip),} %t1
+; RUN: grep "movl	a(%rip)," %t1
 ; RUN: not grep GOTPCREL %t1
 
 @a = hidden global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index 6e19ad3..ae5b583 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movl	a(%rip),} %t1
+; RUN: grep "movl	a(%rip)," %t1
 ; RUN: not grep GOTPCREL %t1
 
 @a = internal global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 4d98ee6..de240a3 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
+; RUN: grep "movq	f@GOTPCREL(%rip)," %t1
 
 define void ()* @g() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
index d3b567c..db35c33 100644
--- a/test/CodeGen/X86/x86-64-pic-8.ll
+++ b/test/CodeGen/X86/x86-64-pic-8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {leaq	f(%rip),} %t1
+; RUN: grep "leaq	f(%rip)," %t1
 ; RUN: not grep GOTPCREL %t1
 
 define void ()* @g() {
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index 0761031..6daea84 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {leaq	f(%rip),} %t1
+; RUN: grep "leaq	f(%rip)," %t1
 ; RUN: not grep GOTPCREL %t1
 
 define void ()* @g() nounwind {
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index a2521b0..8af782c 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -875,37 +875,37 @@ define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
 }
 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
 
-define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczss
-  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
   ret <4 x float> %res
 }
-define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczss
-  %elem = load float* %a1
+  %elem = load float* %a0
   %vec = insertelement <4 x float> undef, float %elem, i32 0
-  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
 
-define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczsd
-  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
   ret <2 x double> %res
 }
-define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczsd
-  %elem = load double* %a1
+  %elem = load double* %a0
   %vec = insertelement <2 x double> undef, double %elem, i32 0
-  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
 
 define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
   ; CHECK: vfrczpd
@@ -967,3 +967,59 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
 }
 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
 
+define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index ddc4cab..996bfc4 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
 
 ; Though it is undefined, we want xor undef,undef to produce zero.
 define <4 x i32> @test1() nounwind {
@@ -31,7 +31,7 @@ entry:
 ; X64: test3:
 ; X64:	notl
 ; X64:	andl
-; X64:	shrl	%eax
+; X64:	shrl
 ; X64:	ret
 
 ; X32: test3:
diff --git a/test/CodeGen/XCore/mkmsk.ll b/test/CodeGen/XCore/mkmsk.ll
new file mode 100644
index 0000000..377612b
--- /dev/null
+++ b/test/CodeGen/XCore/mkmsk.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+define i32 @f(i32) nounwind {
+; CHECK: f:
+; CHECK: mkmsk r0, r0
+; CHECK-NEXT: retsp 0
+entry:
+  %1 = shl i32 1, %0
+  %2 = add i32 %1, -1
+  ret i32 %2
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
new file mode 100755
index 0000000..fe20c8e
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
new file mode 100755
index 0000000..ce4af7f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
new file mode 100755
index 0000000..7c17304
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
new file mode 100755
index 0000000..8848708
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
new file mode 100644
index 0000000..3be9aba
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -0,0 +1,111 @@
+; RUN: llc -O1 -mtriple=x86_64-apple-darwin < %s | FileCheck -check-prefix=DARWIN %s
+; RUN: llc -O1 -mtriple=x86_64-pc-linux-gnu < %s | FileCheck -check-prefix=LINUX %s
+; PR9493
+; Adapted from the original test case in r127757.
+; We use 'llc -O1' to induce variable 'x' to live in different locations.
+; We don't actually care where 'x' lives, or what exact optimizations get
+; used, as long as 'x' moves around we're fine.
+
+; // The variable 'x' lives in different locations, so it needs an entry in
+; // the .debug_loc table section, referenced by DW_AT_location.
+; // This ref is not relocatable on Darwin, and is relocatable elsewhere.
+; extern int g(int, int);
+; extern int a;
+; 
+; void f(void) {
+;   int x;
+;   a = g(0, 0);
+;   x = 1;
+;   while (x & 1) { x *= a; }
+;   a = g(x, 0);
+;   x = 2;
+;   while (x & 2) { x *= a; }
+;   a = g(0, x);
+; }
+
+; // The 'x' variable and its symbol reference location
+; DARWIN:      DW_TAG_variable
+; DARWIN-NEXT: ## DW_AT_name
+; DARWIN-NEXT: .long Lset{{[0-9]+}}
+; DARWIN-NEXT: ## DW_AT_decl_file
+; DARWIN-NEXT: ## DW_AT_decl_line
+; DARWIN-NEXT: ## DW_AT_type
+; DARWIN-NEXT: Lset{{[0-9]+}} = Ldebug_loc{{[0-9]+}}-Lsection_debug_loc ## DW_AT_location
+; DARWIN-NEXT: .long Lset{{[0-9]+}}
+
+; LINUX:      DW_TAG_variable
+; LINUX-NEXT: # DW_AT_name
+; LINUX-NEXT: # DW_AT_decl_file
+; LINUX-NEXT: # DW_AT_decl_line
+; LINUX-NEXT: # DW_AT_type
+; LINUX-NEXT: .long .Ldebug_loc{{[0-9]+}} # DW_AT_location
+
+
+; ModuleID = 'simple.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+
+@a = external global i32
+
+define void @f() nounwind {
+entry:
+  %call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8
+  store i32 %call, i32* @a, align 4, !dbg !8, !tbaa !9
+  tail call void @llvm.dbg.value(metadata !12, i64 0, metadata !5), !dbg !13
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %x.017 = phi i32 [ 1, %entry ], [ %mul, %while.body ]
+  %mul = mul nsw i32 %call, %x.017, !dbg !14
+  %and = and i32 %mul, 1, !dbg !14
+  %tobool = icmp eq i32 %and, 0, !dbg !14
+  br i1 %tobool, label %while.end, label %while.body, !dbg !14
+
+while.end:                                        ; preds = %while.body
+  tail call void @llvm.dbg.value(metadata !{i32 %mul}, i64 0, metadata !5), !dbg !14
+  %call4 = tail call i32 @g(i32 %mul, i32 0) nounwind, !dbg !15
+  store i32 %call4, i32* @a, align 4, !dbg !15, !tbaa !9
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !5), !dbg !17
+  br label %while.body9
+
+while.body9:                                      ; preds = %while.end, %while.body9
+  %x.116 = phi i32 [ 2, %while.end ], [ %mul12, %while.body9 ]
+  %mul12 = mul nsw i32 %call4, %x.116, !dbg !18
+  %and7 = and i32 %mul12, 2, !dbg !18
+  %tobool8 = icmp eq i32 %and7, 0, !dbg !18
+  br i1 %tobool8, label %while.end13, label %while.body9, !dbg !18
+
+while.end13:                                      ; preds = %while.body9
+  tail call void @llvm.dbg.value(metadata !{i32 %mul12}, i64 0, metadata !5), !dbg !18
+  %call15 = tail call i32 @g(i32 0, i32 %mul12) nounwind, !dbg !19
+  store i32 %call15, i32* @a, align 4, !dbg !19, !tbaa !9
+  ret void, !dbg !20
+}
+
+declare i32 @g(i32, i32)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.f = !{!5}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !"clang version 3.0 (trunk)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590080, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 589835, metadata !0, i32 4, i32 14, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 6, i32 3, metadata !6, null}
+!9 = metadata !{metadata !"int", metadata !10}
+!10 = metadata !{metadata !"omnipotent char", metadata !11}
+!11 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!12 = metadata !{i32 1}
+!13 = metadata !{i32 7, i32 3, metadata !6, null}
+!14 = metadata !{i32 8, i32 3, metadata !6, null}
+!15 = metadata !{i32 9, i32 3, metadata !6, null}
+!16 = metadata !{i32 2}
+!17 = metadata !{i32 10, i32 3, metadata !6, null}
+!18 = metadata !{i32 11, i32 3, metadata !6, null}
+!19 = metadata !{i32 12, i32 3, metadata !6, null}
+!20 = metadata !{i32 13, i32 1, metadata !6, null}
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
new file mode 100644
index 0000000..9e6c7ff
--- /dev/null
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -0,0 +1,42 @@
+; RUN: llc %s -mtriple=x86_64-pc-linux-gnu -O0 -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; If stack is realigned, we shouldn't describe locations of local
+; variables by giving offset from the frame pointer (%rbp):
+; push %rpb
+; mov  %rsp,%rbp
+; and  ALIGNMENT,%rsp ; (%rsp and %rbp are different now)
+; It's better to use offset from %rsp instead.
+
+; DW_AT_location of variable "x" shouldn't be equal to
+; (DW_OP_fbreg: .*): DW_OP_fbreg has code 0x91
+
+; CHECK: {{0x.* DW_TAG_variable}}
+; CHECK-NOT: {{DW_AT_location.*DW_FORM_block1.*0x.*91}}
+; CHECK: NULL
+
+define void @_Z3runv() nounwind uwtable {
+entry:
+  %x = alloca i32, align 32
+  call void @llvm.dbg.declare(metadata !{i32* %x}, metadata !9), !dbg !12
+  ret void, !dbg !13
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 2, i32 7, metadata !10, null}
+!13 = metadata !{i32 3, i32 1, metadata !10, null}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
new file mode 100644
index 0000000..6eb715d
--- /dev/null
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -0,0 +1,45 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+@a = global i32 0, align 4
+@b = global i64 0, align 8
+@c = global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{metadata !3, metadata !8, metadata !12}
+!3 = metadata !{i32 786436, null, metadata !"A", metadata !4, i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!4 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
+!8 = metadata !{i32 786436, null, metadata !"B", metadata !4, i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
+!12 = metadata !{i32 786436, null, metadata !"C", metadata !4, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 0}
+!17 = metadata !{metadata !18}
+!18 = metadata !{metadata !19, metadata !20, metadata !21}
+!19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c} ; [ DW_TAG_variable ]
+
+; CHECK: DW_TAG_enumeration_type [3]
+; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0026 => {0x00000026})
+; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "A")
+
+; CHECK: DW_TAG_enumeration_type [3] *
+; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0057 => {0x00000057})
+; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_AT_name [DW_FORM_strp]          ( .debug_str[{{.*}}] = "B")
+
+; CHECK: DW_TAG_enumeration_type [6]
+; CHECK-NOT: DW_AT_enum_class
+; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "C")
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
new file mode 100644
index 0000000..c2dacea
--- /dev/null
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+@e = global i16 0, align 2
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157772) (llvm/trunk 157761)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !6, metadata !6, metadata !7} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786436, null, metadata !"E", metadata !4, i32 1, i64 16, i64 16, i32 0, i32 4, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!4 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 0}
+!6 = metadata !{metadata !5}
+!7 = metadata !{metadata !8}
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !4, i32 2, metadata !3, i32 0, i32 1, i16* @e} ; [ DW_TAG_variable ]
+
+; CHECK: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name
+; CHECK-NEXT: DW_AT_byte_size
+; CHECK-NEXT: DW_AT_declaration
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
new file mode 100644
index 0000000..c84b2e6
--- /dev/null
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -0,0 +1,89 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
+; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
+; DW_AT_location lists yet.
+; CHECK: DW_AT_location [DW_FORM_data4]                      (0x00000000)
+
+define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
+entry:
+  %s.addr = alloca i32, align 4
+  %saved_stack = alloca i8*
+  %i = alloca i32, align 4
+  store i32 %s, i32* %s.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %s.addr}, metadata !10), !dbg !11
+  %0 = load i32* %s.addr, align 4, !dbg !12
+  %1 = zext i32 %0 to i64, !dbg !12
+  %2 = call i8* @llvm.stacksave(), !dbg !12
+  store i8* %2, i8** %saved_stack, !dbg !12
+  %vla = alloca i32, i64 %1, align 16, !dbg !12
+  call void @llvm.dbg.declare(metadata !{i32* %vla}, metadata !14), !dbg !18
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !19), !dbg !20
+  store i32 0, i32* %i, align 4, !dbg !21
+  br label %for.cond, !dbg !21
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %3 = load i32* %i, align 4, !dbg !21
+  %4 = load i32* %s.addr, align 4, !dbg !21
+  %cmp = icmp slt i32 %3, %4, !dbg !21
+  br i1 %cmp, label %for.body, label %for.end, !dbg !21
+
+for.body:                                         ; preds = %for.cond
+  %5 = load i32* %i, align 4, !dbg !23
+  %6 = load i32* %i, align 4, !dbg !23
+  %mul = mul nsw i32 %5, %6, !dbg !23
+  %7 = load i32* %i, align 4, !dbg !23
+  %idxprom = sext i32 %7 to i64, !dbg !23
+  %arrayidx = getelementptr inbounds i32* %vla, i64 %idxprom, !dbg !23
+  store i32 %mul, i32* %arrayidx, align 4, !dbg !23
+  br label %for.inc, !dbg !25
+
+for.inc:                                          ; preds = %for.body
+  %8 = load i32* %i, align 4, !dbg !26
+  %inc = add nsw i32 %8, 1, !dbg !26
+  store i32 %inc, i32* %i, align 4, !dbg !26
+  br label %for.cond, !dbg !26
+
+for.end:                                          ; preds = %for.cond
+  %9 = load i8** %saved_stack, !dbg !27
+  call void @llvm.stackrestore(i8* %9), !dbg !27
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 1, i32 26, metadata !5, null}
+!12 = metadata !{i32 3, i32 13, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !5, i32 2, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 3, i32 7, metadata !13, null}
+!19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 4, i32 7, metadata !13, null}
+!21 = metadata !{i32 5, i32 8, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !13, i32 5, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 6, i32 5, metadata !24, null}
+!24 = metadata !{i32 786443, metadata !22, i32 5, i32 27, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 7, i32 3, metadata !24, null}
+!26 = metadata !{i32 5, i32 22, metadata !22, null}
+!27 = metadata !{i32 8, i32 1, metadata !13, null}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
new file mode 100644
index 0000000..abb946d
--- /dev/null
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -0,0 +1,238 @@
+; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -o /dev/null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.function = type { i8 }
+%class.BPLFunctionWriter = type { %struct.BPLModuleWriter* }
+%struct.BPLModuleWriter = type { i8 }
+%class.anon = type { i8 }
+%class.anon.0 = type { i8 }
+
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = alias internal void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = alias internal void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
+
+define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %class.BPLFunctionWriter*, align 8
+  %agg.tmp = alloca %class.function, align 1
+  %agg.tmp2 = alloca %class.anon, align 1
+  %agg.tmp4 = alloca %class.function, align 1
+  %agg.tmp5 = alloca %class.anon.0, align 1
+  store %class.BPLFunctionWriter* %this, %class.BPLFunctionWriter** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.BPLFunctionWriter** %this.addr}, metadata !133), !dbg !135
+  %this1 = load %class.BPLFunctionWriter** %this.addr
+  %MW = getelementptr inbounds %class.BPLFunctionWriter* %this1, i32 0, i32 0, !dbg !136
+  %0 = load %struct.BPLModuleWriter** %MW, align 8, !dbg !136
+  call void @"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"(%class.function* %agg.tmp), !dbg !136
+  call void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter* %0), !dbg !136
+  %MW3 = getelementptr inbounds %class.BPLFunctionWriter* %this1, i32 0, i32 0, !dbg !138
+  %1 = load %struct.BPLModuleWriter** %MW3, align 8, !dbg !138
+  call void @"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"(%class.function* %agg.tmp4), !dbg !138
+  call void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter* %1), !dbg !138
+  ret void, !dbg !139
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter*)
+
+define internal void @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"(%class.function* %this) unnamed_addr nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %class.function*, align 8
+  %__f = alloca %class.anon.0, align 1
+  store %class.function* %this, %class.function** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.function** %this.addr}, metadata !140), !dbg !142
+  call void @llvm.dbg.declare(metadata !{%class.anon.0* %__f}, metadata !143), !dbg !144
+  %this1 = load %class.function** %this.addr
+  call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_"(%class.anon.0* %__f), !dbg !145
+  ret void, !dbg !147
+}
+
+define internal void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_"(%class.anon.0*) nounwind uwtable align 2 {
+entry:
+  %.addr = alloca %class.anon.0*, align 8
+  store %class.anon.0* %0, %class.anon.0** %.addr, align 8
+  ret void, !dbg !148
+}
+
+define internal void @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"(%class.function* %this) unnamed_addr nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %class.function*, align 8
+  %__f = alloca %class.anon, align 1
+  store %class.function* %this, %class.function** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.function** %this.addr}, metadata !150), !dbg !151
+  call void @llvm.dbg.declare(metadata !{%class.anon* %__f}, metadata !152), !dbg !153
+  %this1 = load %class.function** %this.addr
+  call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_"(%class.anon* %__f), !dbg !154
+  ret void, !dbg !156
+}
+
+define internal void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_"(%class.anon*) nounwind uwtable align 2 {
+entry:
+  %.addr = alloca %class.anon*, align 8
+  store %class.anon* %0, %class.anon** %.addr, align 8
+  ret void, !dbg !157
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
+!5 = metadata !{i32 786478, i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786434, null, metadata !"BPLFunctionWriter", metadata !6, i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{metadata !12, metadata !103}
+!12 = metadata !{i32 786445, metadata !10, metadata !"MW", metadata !6, i32 16, i64 64, i64 64, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786434, null, metadata !"BPLModuleWriter", metadata !6, i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ]
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 786478, i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{null, metadata !19, metadata !20}
+!19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786434, null, metadata !"function<void ()>", metadata !6, i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ]
+!21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
+!22 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!24 = metadata !{null, metadata !25, metadata !26}
+!25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ]
+!27 = metadata !{metadata !28, metadata !35, metadata !41}
+!28 = metadata !{i32 786478, i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !31}
+!31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 786478, i32 0, metadata !26, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!37 = metadata !{null, metadata !38}
+!38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{metadata !40}
+!40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!41 = metadata !{i32 786478, i32 0, metadata !26, metadata !"", metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!43 = metadata !{null, metadata !38, metadata !44}
+!44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!49 = metadata !{metadata !50}
+!50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!51 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
+!52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!53 = metadata !{null, metadata !25, metadata !20}
+!54 = metadata !{metadata !55}
+!55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!56 = metadata !{metadata !57}
+!57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!58 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
+!59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!60 = metadata !{null, metadata !25, metadata !61}
+!61 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ]
+!62 = metadata !{metadata !63, metadata !70, metadata !76}
+!63 = metadata !{i32 786478, i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!65 = metadata !{null, metadata !66}
+!66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
+!67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
+!68 = metadata !{metadata !69}
+!69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!70 = metadata !{i32 786478, i32 0, metadata !61, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!72 = metadata !{null, metadata !73}
+!73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{metadata !75}
+!75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!76 = metadata !{i32 786478, i32 0, metadata !61, metadata !"", metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!78 = metadata !{null, metadata !73, metadata !79}
+!79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
+!80 = metadata !{metadata !81}
+!81 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!82 = metadata !{metadata !83}
+!83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!84 = metadata !{metadata !85}
+!85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!86 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function", metadata !"function", metadata !"", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
+!87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!88 = metadata !{null, metadata !25, metadata !89}
+!89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
+!90 = metadata !{metadata !91}
+!91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!92 = metadata !{i32 786478, i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !"", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
+!93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!94 = metadata !{null, metadata !25}
+!95 = metadata !{metadata !96}
+!96 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!97 = metadata !{metadata !98}
+!98 = metadata !{i32 786479, null, metadata !"T", metadata !99, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!99 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!100 = metadata !{null}
+!101 = metadata !{metadata !102}
+!102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
+!103 = metadata !{i32 786478, i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
+!104 = metadata !{metadata !105}
+!105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
+!106 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!107 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!109 = metadata !{null, metadata !110}
+!110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
+!111 = metadata !{metadata !112}
+!112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!113 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
+!114 = metadata !{i32 786434, null, metadata !"_Base_manager", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ]
+!115 = metadata !{metadata !116, metadata !113}
+!116 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
+!117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!118 = metadata !{null, metadata !119}
+!119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
+!120 = metadata !{metadata !121}
+!121 = metadata !{i32 786479, null, metadata !"_Tp", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!122 = metadata !{metadata !123}
+!123 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
+!124 = metadata !{metadata !125}
+!125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
+!126 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!127 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!128 = metadata !{metadata !129}
+!129 = metadata !{metadata !130}
+!130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ]
+!131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
+!132 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!133 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777235, metadata !134, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!134 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!135 = metadata !{i32 19, i32 39, metadata !5, null}
+!136 = metadata !{i32 20, i32 17, metadata !137, null}
+!137 = metadata !{i32 786443, metadata !5, i32 19, i32 51, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!138 = metadata !{i32 23, i32 17, metadata !137, null}
+!139 = metadata !{i32 26, i32 15, metadata !137, null}
+!140 = metadata !{i32 786689, metadata !106, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!141 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!142 = metadata !{i32 8, i32 45, metadata !106, null}
+!143 = metadata !{i32 786689, metadata !106, metadata !"__f", metadata !6, i32 33554440, metadata !61, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!144 = metadata !{i32 8, i32 63, metadata !106, null}
+!145 = metadata !{i32 9, i32 9, metadata !146, null}
+!146 = metadata !{i32 786443, metadata !106, i32 8, i32 81, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!147 = metadata !{i32 10, i32 13, metadata !146, null}
+!148 = metadata !{i32 4, i32 5, metadata !149, null}
+!149 = metadata !{i32 786443, metadata !107, i32 3, i32 105, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!150 = metadata !{i32 786689, metadata !126, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!151 = metadata !{i32 8, i32 45, metadata !126, null}
+!152 = metadata !{i32 786689, metadata !126, metadata !"__f", metadata !6, i32 33554440, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!153 = metadata !{i32 8, i32 63, metadata !126, null}
+!154 = metadata !{i32 9, i32 9, metadata !155, null}
+!155 = metadata !{i32 786443, metadata !126, i32 8, i32 81, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!156 = metadata !{i32 10, i32 13, metadata !155, null}
+!157 = metadata !{i32 4, i32 5, metadata !158, null}
+!158 = metadata !{i32 786443, metadata !127, i32 3, i32 105, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
new file mode 100644
index 0000000..e820cb5
--- /dev/null
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -0,0 +1,28 @@
+; RUN: llc %s -o %t -filetype=obj -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; PR13303
+
+; Check that the prologue ends with is_stmt here.
+; CHECK: 0x0000000000000000 {{.*}} is_stmt
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"PR13303.c", metadata !"/home/probinson", metadata !"clang version 3.2 (trunk 160143)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 1, i32 14, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
new file mode 100644
index 0000000..e73869d
--- /dev/null
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj -O0
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: DW_TAG_rvalue_reference_type
+
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+define void @_Z3fooOi(i32* %i) uwtable ssp {
+entry:
+  %i.addr = alloca i32*, align 8
+  store i32* %i, i32** %i.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %i.addr}, metadata !11), !dbg !12
+  %0 = load i32** %i.addr, align 8, !dbg !13
+  %1 = load i32* %0, align 4, !dbg !13
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1), !dbg !13
+  ret void, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 4, i32 17, metadata !5, null}
+!13 = metadata !{i32 6, i32 3, metadata !14, null}
+!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 7, i32 1, metadata !14, null}
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
new file mode 100644
index 0000000..de23dcd
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -0,0 +1,46 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
+RUN:   --address=0x400589 --functions | FileCheck %s -check-prefix MAIN
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
+RUN:   --address=0x400558 --functions | FileCheck %s -check-prefix FUNCTION
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \
+RUN:   --address=0x4005b6 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
+RUN:   --address=0x4004b8 --functions | FileCheck %s -check-prefix MANY_CU_1
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
+RUN:   --address=0x4004c4 --functions | FileCheck %s -check-prefix MANY_CU_2
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN:   --address=0x580 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN:   --address=0x573 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
+RUN:   --address=0x55c --functions \
+RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
+
+MAIN: main
+MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
+
+FUNCTION: _Z1fii
+FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11:18
+
+CTOR_WITH_SPEC: _ZN10DummyClassC1Ei
+CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4:30
+
+MANY_CU_1: a
+MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}a.cc:2:0
+
+MANY_CU_2: main
+MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}main.cc:4:0
+
+ABS_ORIGIN_1: C
+ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}def2.cc:4:0
+
+INCLUDE_TEST_1: _Z3do2v
+INCLUDE_TEST_1-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}decl2.h:1:0
+
+INCLUDE_TEST_2: _Z3do1v
+INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
+
+MANY_SEQ_IN_LINE_TABLE: _Z1cv
+MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
new file mode 100644
index 0000000..ed4e7da
--- /dev/null
+++ b/test/DebugInfo/inlined-vars.ll
@@ -0,0 +1,57 @@
+; RUN: llc -O0 < %s | FileCheck %s -check-prefix ARGUMENT
+; RUN: llc -O0 < %s | FileCheck %s -check-prefix VARIABLE
+; PR 13202
+
+define i32 @main() uwtable {
+entry:
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !21
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !22), !dbg !23
+  tail call void @smth(i32 0), !dbg !24
+  tail call void @smth(i32 0), !dbg !25
+  ret i32 0, !dbg !19
+}
+
+declare void @smth(i32)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", metadata !"clang version 3.2 (trunk 159419)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !10}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !9, metadata !9}
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+
+; Two DW_TAG_formal_parameter: one abstract and one inlined.
+; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+
+!16 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+
+; Two DW_TAG_variable: one abstract and one inlined.
+; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
+; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
+; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
+
+!17 = metadata !{i32 786443, metadata !10, i32 3, i32 35, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, metadata !19} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 11, i32 10, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !5, i32 10, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 3, i32 25, metadata !10, metadata !19}
+!22 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 4, i32 16, metadata !17, metadata !19}
+!24 = metadata !{i32 5, i32 3, metadata !17, metadata !19}
+!25 = metadata !{i32 6, i32 3, metadata !17, metadata !19}
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
index 19eebc0..00bd9b8 100644
--- a/test/DebugInfo/lit.local.cfg
+++ b/test/DebugInfo/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll
index 3193791..396ae85 100644
--- a/test/DebugInfo/printdbginfo2.ll
+++ b/test/DebugInfo/printdbginfo2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s
+; RUN: opt < %s -print-dbginfo -disable-output 2>&1 | FileCheck %s
 ;  grep {%b is variable b of type x declared at x.c:7} %t1
 ;  grep {%2 is variable b of type x declared at x.c:7} %t1
 ;  grep {@c.1442 is variable c of type int declared at x.c:4} %t1
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
new file mode 100644
index 0000000..46273d3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -0,0 +1,37 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define void @getoptions(i32* %argc) {
+bb0:
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+	call i32 (i8*, ...)* @printf( i8* getelementptr ([10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
+	%cast224 = bitcast i8** %argv to i8*		; <i8*> [#uses=1]
+	%local = alloca i8*		; <i8**> [#uses=3]
+	store i8* %cast224, i8** %local
+	%cond226 = icmp sle i32 %argc, 0		; <i1> [#uses=1]
+	br i1 %cond226, label %bb3, label %bb2
+bb2:		; preds = %bb2, %bb0
+	%cann-indvar = phi i32 [ 0, %bb0 ], [ %add1-indvar, %bb2 ]		; <i32> [#uses=2]
+	%add1-indvar = add i32 %cann-indvar, 1		; <i32> [#uses=2]
+	%cann-indvar-idxcast = sext i32 %cann-indvar to i64		; <i64> [#uses=1]
+	%CT = bitcast i8** %local to i8***		; <i8***> [#uses=1]
+	%reg115 = load i8*** %CT		; <i8**> [#uses=1]
+	%cast235 = getelementptr i8** %reg115, i64 %cann-indvar-idxcast		; <i8**> [#uses=1]
+	%reg117 = load i8** %cast235		; <i8*> [#uses=1]
+	%reg236 = call i32 @puts( i8* %reg117 )		; <i32> [#uses=0]
+	%cond239 = icmp slt i32 %add1-indvar, %argc		; <i1> [#uses=1]
+	br i1 %cond239, label %bb2, label %bb3
+bb3:		; preds = %bb2, %bb0
+	%cast243 = bitcast i8** %local to i32*		; <i32*> [#uses=1]
+	call void @getoptions( i32* %cast243 )
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
new file mode 100644
index 0000000..88bfbb3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
@@ -0,0 +1,13 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @foo(i32 %X, i32 %Y, double %A) {
+	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
+	%cast110 = zext i1 %cond212 to i32		; <i32> [#uses=1]
+	ret i32 %cast110
+}
+
+define i32 @main() {
+	%reg212 = call i32 @foo( i32 0, i32 1, double 1.000000e+00 )		; <i32> [#uses=1]
+	ret i32 %reg212
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
new file mode 100644
index 0000000..d5f860d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
@@ -0,0 +1,20 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
+define internal i32 @mylog(i32 %num) {
+bb0:
+	br label %bb2
+bb2:		; preds = %bb2, %bb0
+	%reg112 = phi i32 [ 10, %bb2 ], [ 1, %bb0 ]		; <i32> [#uses=1]
+	%cann-indvar = phi i32 [ %cann-indvar, %bb2 ], [ 0, %bb0 ]		; <i32> [#uses=1]
+	%reg114 = add i32 %reg112, 1		; <i32> [#uses=2]
+	%cond222 = icmp slt i32 %reg114, %num		; <i1> [#uses=1]
+	br i1 %cond222, label %bb2, label %bb3
+bb3:		; preds = %bb2
+	ret i32 %reg114
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
new file mode 100644
index 0000000..721f2e8
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%X = phi i32 [ 0, %0 ], [ 1, %Loop ]		; <i32> [#uses=1]
+	br i1 true, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 %X
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
new file mode 100644
index 0000000..d17df99
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; We were accidentally inverting the signedness of right shifts.  Whoops.
+
+define i32 @main() {
+	%X = ashr i32 -1, 16		; <i32> [#uses=1]
+	%Y = ashr i32 %X, 16		; <i32> [#uses=1]
+	%Z = add i32 %Y, 1		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
new file mode 100644
index 0000000..e55cb06
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
@@ -0,0 +1,10 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
+	%Y = fsub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
+	%Z = fcmp oeq double %X, %Y		; <i1> [#uses=0]
+	fadd double %Y, 0.000000e+00		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
new file mode 100644
index 0000000..663dc40
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @bar(i8* %X) {
+        ; pointer should be 4 byte aligned!
+	%P = alloca double		; <double*> [#uses=1]
+	%R = ptrtoint double* %P to i32		; <i32> [#uses=1]
+	%A = and i32 %R, 3		; <i32> [#uses=1]
+	ret i32 %A
+}
+
+define i32 @main() {
+	%SP = alloca i8		; <i8*> [#uses=1]
+	%X = add i32 0, 0		; <i32> [#uses=1]
+	alloca i8, i32 %X		; <i8*>:1 [#uses=0]
+	call i32 @bar( i8* %SP )		; <i32>:2 [#uses=1]
+	ret i32 %2
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
new file mode 100644
index 0000000..e95294b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -0,0 +1,19 @@
+; This testcase should return with an exit code of 1.
+;
+; RUN: not %lli -use-mcjit %s
+
+@test = global i64 0		; <i64*> [#uses=1]
+
+define internal i64 @test.upgrd.1() {
+	%tmp.0 = load i64* @test		; <i64> [#uses=1]
+	%tmp.1 = add i64 %tmp.0, 1		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i32 @main() {
+	%L = call i64 @test.upgrd.1( )		; <i64> [#uses=1]
+	%I = trunc i64 %L to i32		; <i32> [#uses=1]
+	ret i32 %I
+}
+
+
diff --git a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
new file mode 100644
index 0000000..a237194
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s test
+
+declare i32 @puts(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+	%tmp.5 = getelementptr i8** %argv.1, i64 1		; <i8**> [#uses=1]
+	%tmp.6 = load i8** %tmp.5		; <i8*> [#uses=1]
+	%tmp.0 = call i32 @puts( i8* %tmp.6 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
new file mode 100644
index 0000000..70464a3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
@@ -0,0 +1,15 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %endif
+then:		; No predecessors!
+	br label %endif
+endif:		; preds = %then, %entry
+	%x = phi i32 [ 4, %entry ], [ 27, %then ]		; <i32> [#uses=0]
+	%result = phi i32 [ 32, %then ], [ 0, %entry ]		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
new file mode 100644
index 0000000..58d423f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
@@ -0,0 +1,19 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %loopentry.0, %entry
+	%h.0 = phi i32 [ %tmp.2, %loopentry.0 ], [ -1, %entry ]		; <i32> [#uses=1]
+	%tmp.2 = add i32 %h.0, 1		; <i32> [#uses=3]
+	%tmp.4 = icmp ne i32 %tmp.2, 0		; <i1> [#uses=1]
+	br i1 %tmp.4, label %loopentry.0, label %loopentry.1
+loopentry.1:		; preds = %loopentry.0
+	%h.1 = phi i32 [ %tmp.2, %loopentry.0 ]		; <i32> [#uses=1]
+	ret i32 %h.1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
new file mode 100644
index 0000000..a22fe07
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	%X = add i32 1, -1		; <i32> [#uses=3]
+	br label %Next
+Next:		; preds = %entry
+	%A = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%B = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%C = phi i32 [ %X, %entry ]		; <i32> [#uses=1]
+	ret i32 %C
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
new file mode 100644
index 0000000..b3c6d8a
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This testcase failed to work because two variable sized allocas confused the
+; local register allocator.
+
+define i32 @main(i32 %X) {
+	%A = alloca i32, i32 %X		; <i32*> [#uses=0]
+	%B = alloca float, i32 %X		; <float*> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
new file mode 100644
index 0000000..bd32f30
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+;
+; Regression Test: EnvironmentTest.ll
+;
+; Description:
+;	This is a regression test that verifies that the JIT passes the
+;	environment to the main() function.
+;
+
+
+declare i32 @strlen(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1, i8** %envp.1) {
+	%tmp.2 = load i8** %envp.1		; <i8*> [#uses=1]
+	%tmp.3 = call i32 @strlen( i8* %tmp.2 )		; <i32> [#uses=1]
+	%T = icmp eq i32 %tmp.3, 0		; <i1> [#uses=1]
+	%R = zext i1 %T to i32		; <i32> [#uses=1]
+	ret i32 %R
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
new file mode 100644
index 0000000..1959534
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This testcase exposes a bug in the local register allocator where it runs out
+; of registers (due to too many overlapping live ranges), but then attempts to
+; use the ESP register (which is not allocatable) to hold a value.
+
+define i32 @main(i32 %A) {
+        ; ESP gets used again...
+	%Ap2 = alloca i32, i32 %A		; <i32*> [#uses=11]
+	; Produce lots of overlapping live ranges
+        %B = add i32 %A, 1		; <i32> [#uses=1]
+	%C = add i32 %A, 2		; <i32> [#uses=1]
+	%D = add i32 %A, 3		; <i32> [#uses=1]
+	%E = add i32 %A, 4		; <i32> [#uses=1]
+	%F = add i32 %A, 5		; <i32> [#uses=1]
+	%G = add i32 %A, 6		; <i32> [#uses=1]
+	%H = add i32 %A, 7		; <i32> [#uses=1]
+	%I = add i32 %A, 8		; <i32> [#uses=1]
+	%J = add i32 %A, 9		; <i32> [#uses=1]
+	%K = add i32 %A, 10		; <i32> [#uses=1]
+        ; Uses of all of the values
+	store i32 %A, i32* %Ap2
+	store i32 %B, i32* %Ap2
+	store i32 %C, i32* %Ap2
+	store i32 %D, i32* %Ap2
+	store i32 %E, i32* %Ap2
+	store i32 %F, i32* %Ap2
+	store i32 %G, i32* %Ap2
+	store i32 %H, i32* %Ap2
+	store i32 %I, i32* %Ap2
+	store i32 %J, i32* %Ap2
+	store i32 %K, i32* %Ap2
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
new file mode 100644
index 0000000..1f8343f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -0,0 +1,23 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@A = global i32 0		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%Ret = call i32 @test( i1 true, i32 0 )		; <i32> [#uses=1]
+	ret i32 %Ret
+}
+
+define i32 @test(i1 %c, i32 %A) {
+	br i1 %c, label %Taken1, label %NotTaken
+Cont:		; preds = %Taken1, %NotTaken
+	%V = phi i32 [ 0, %NotTaken ], [ sub (i32 ptrtoint (i32* @A to i32), i32 1234), %Taken1 ]		; <i32> [#uses=0]
+	ret i32 0
+NotTaken:		; preds = %0
+	br label %Cont
+Taken1:		; preds = %0
+	%B = icmp eq i32 %A, 0		; <i1> [#uses=1]
+	br i1 %B, label %Cont, label %ExitError
+ExitError:		; preds = %Taken1
+	ret i32 12
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
new file mode 100644
index 0000000..79a7d58
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
@@ -0,0 +1,22 @@
+; PR672
+; RUN: %lli -use-mcjit %s
+; XFAIL: mcjit-ia32
+
+define i32 @main() {
+	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
+	%res = tail call fastcc i32 @check_tail( i32 10, i32* %f, i32 10 )		; <i32> [#uses=1]
+	ret i32 %res
+}
+
+define fastcc i32 @check_tail(i32 %x, i32* %f, i32 %g) {
+	%tmp1 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %tmp1, label %if-then, label %if-else
+if-then:		; preds = %0
+	%fun_ptr = bitcast i32* %f to i32 (i32, i32*, i32)*		; <i32 (i32, i32*, i32)*> [#uses=1]
+	%arg1 = add i32 %x, -1		; <i32> [#uses=1]
+	%res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32* %f, i32 %g )		; <i32> [#uses=1]
+	ret i32 %res
+if-else:		; preds = %0
+	ret i32 %x
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
new file mode 100644
index 0000000..52cef4d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -0,0 +1,19 @@
+; RUN: %lli -use-mcjit -force-interpreter %s
+; PR1836
+
+define i32 @main() {
+entry:
+    %retval = alloca i32        ; <i32*> [#uses=2]
+    %tmp = alloca i32       ; <i32*> [#uses=2]
+    %x = alloca i75, align 16       ; <i75*> [#uses=1]
+    %"alloca point" = bitcast i32 0 to i32      ; <i32> [#uses=0]
+    store i75 999, i75* %x, align 16
+    store i32 0, i32* %tmp, align 4
+    %tmp1 = load i32* %tmp, align 4     ; <i32> [#uses=1]
+    store i32 %tmp1, i32* %retval, align 4
+    br label %return
+
+return:     ; preds = %entry
+    %retval2 = load i32* %retval        ; <i32> [#uses=1]
+    ret i32 %retval2
+}
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
new file mode 100644
index 0000000..a6e917f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -0,0 +1,59 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s | grep 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+@.str = internal constant [10 x i8] c"MSB = %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+define i65 @foo(i65 %x) {
+entry:
+	%x_addr = alloca i65		; <i65*> [#uses=2]
+	%retval = alloca i65		; <i65*> [#uses=2]
+	%tmp = alloca i65		; <i65*> [#uses=2]
+	%"alloca point" = bitcast i65 0 to i65		; <i65> [#uses=0]
+	store i65 %x, i65* %x_addr
+	%tmp1 = load i65* %x_addr, align 4		; <i65> [#uses=1]
+	%tmp2 = ashr i65 %tmp1, 65		; <i65> [#uses=1]
+	store i65 %tmp2, i65* %tmp, align 4
+	%tmp3 = load i65* %tmp, align 4		; <i65> [#uses=1]
+	store i65 %tmp3, i65* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i65* %retval		; <i65> [#uses=1]
+	ret i65 %retval4
+}
+
+define i32 @main() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i65 @foo( i65 -9 )		; <i65> [#uses=1]
+	%tmp1 = lshr i65 %tmp, 64		; <i65> [#uses=1]
+	%tmp2 = xor i65 %tmp1, 1		; <i65> [#uses=1]
+	%tmp3 = and i65 %tmp2, 1		; <i65> [#uses=1]
+	%tmp34 = trunc i65 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	store i32 0, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_false:		; preds = %entry
+	store i32 1, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_next:		; preds = %cond_false, %cond_true
+	%tmp5 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp6 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next
+    store i32 0, i32* %retval, align 4
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @printf(i8* noalias , ...) nounwind 
diff --git a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
new file mode 100644
index 0000000..524a724
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
@@ -0,0 +1,8 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s
+
+define i32 @main() {
+       %a = add i32 0, undef
+       %b = fadd float 0.0, undef
+       %c = fadd double 0.0, undef
+       ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
new file mode 100644
index 0000000..9da908f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -0,0 +1,20 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s | grep 40091eb8
+;
+define i32 @test(double %x) {
+entry:
+	%x46.i = bitcast double %x to i64	
+	%tmp343.i = lshr i64 %x46.i, 32	
+	%tmp344.i = trunc i64 %tmp343.i to i32
+        ret i32 %tmp344.i
+}
+
+define i32 @main()
+{
+       %res = call i32 @test(double 3.14)
+       %ptr = getelementptr [4 x i8]* @format, i32 0, i32 0
+       call i32 (i8*,...)* @printf(i8* %ptr, i32 %res)
+       ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+@format = internal constant [4 x i8] c"%x\0A\00"
diff --git a/test/ExecutionEngine/MCJIT/hello.ll b/test/ExecutionEngine/MCJIT/hello.ll
new file mode 100644
index 0000000..a52b6d4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
new file mode 100644
index 0000000..670a6dd
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@X = global i32 7		; <i32*> [#uses=0]
+@msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
+
+declare void @printf([13 x i8]*, ...)
+
+define void @bar() {
+	call void ([13 x i8]*, ...)* @printf( [13 x i8]* @msg )
+	ret void
+}
+
+define i32 @main() {
+	call void @bar( )
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
new file mode 100644
index 0000000..2980ce7
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -0,0 +1,20 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets):
+    config.unsupported = False
+else:
+    config.unsupported = True
+
+if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips']:
+    config.unsupported = True
+
+if root.host_os in ['Win32', 'Cygwin', 'MingW', 'Windows', 'Darwin']:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/simplesttest.ll b/test/ExecutionEngine/MCJIT/simplesttest.ll
new file mode 100644
index 0000000..a6688c2
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simplesttest.ll
@@ -0,0 +1,6 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/simpletest.ll b/test/ExecutionEngine/MCJIT/simpletest.ll
new file mode 100644
index 0000000..4562aa6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simpletest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @bar() {
+	ret i32 0
+}
+
+define i32 @main() {
+	%r = call i32 @bar( )		; <i32> [#uses=1]
+	ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/stubs.ll b/test/ExecutionEngine/MCJIT/stubs.ll
new file mode 100644
index 0000000..b285b0e
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/stubs.ll
@@ -0,0 +1,35 @@
+; RUN: %lli -use-mcjit -disable-lazy-compilation=false %s
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/test-arith.ll b/test/ExecutionEngine/MCJIT/test-arith.ll
new file mode 100644
index 0000000..3177760
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-arith.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%A = add i8 0, 12		; <i8> [#uses=1]
+	%B = sub i8 %A, 1		; <i8> [#uses=2]
+	%C = mul i8 %B, %B		; <i8> [#uses=2]
+	%D = sdiv i8 %C, %C		; <i8> [#uses=2]
+	%E = srem i8 %D, %D		; <i8> [#uses=0]
+	%F = udiv i8 5, 6		; <i8> [#uses=0]
+	%G = urem i8 6, 5		; <i8> [#uses=0]
+	%A.upgrd.1 = add i16 0, 12		; <i16> [#uses=1]
+	%B.upgrd.2 = sub i16 %A.upgrd.1, 1		; <i16> [#uses=2]
+	%C.upgrd.3 = mul i16 %B.upgrd.2, %B.upgrd.2		; <i16> [#uses=2]
+	%D.upgrd.4 = sdiv i16 %C.upgrd.3, %C.upgrd.3		; <i16> [#uses=2]
+	%E.upgrd.5 = srem i16 %D.upgrd.4, %D.upgrd.4		; <i16> [#uses=0]
+	%F.upgrd.6 = udiv i16 5, 6		; <i16> [#uses=0]
+	%G.upgrd.7 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.8 = add i32 0, 12		; <i32> [#uses=1]
+	%B.upgrd.9 = sub i32 %A.upgrd.8, 1		; <i32> [#uses=2]
+	%C.upgrd.10 = mul i32 %B.upgrd.9, %B.upgrd.9		; <i32> [#uses=2]
+	%D.upgrd.11 = sdiv i32 %C.upgrd.10, %C.upgrd.10		; <i32> [#uses=2]
+	%E.upgrd.12 = srem i32 %D.upgrd.11, %D.upgrd.11		; <i32> [#uses=0]
+	%F.upgrd.13 = udiv i32 5, 6		; <i32> [#uses=0]
+	%G1 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.14 = add i64 0, 12		; <i64> [#uses=1]
+	%B.upgrd.15 = sub i64 %A.upgrd.14, 1		; <i64> [#uses=2]
+	%C.upgrd.16 = mul i64 %B.upgrd.15, %B.upgrd.15		; <i64> [#uses=2]
+	%D.upgrd.17 = sdiv i64 %C.upgrd.16, %C.upgrd.16		; <i64> [#uses=2]
+	%E.upgrd.18 = srem i64 %D.upgrd.17, %D.upgrd.17		; <i64> [#uses=0]
+	%F.upgrd.19 = udiv i64 5, 6		; <i64> [#uses=0]
+	%G.upgrd.20 = urem i64 6, 5		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-branch.ll b/test/ExecutionEngine/MCJIT/test-branch.ll
new file mode 100644
index 0000000..702c110
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-branch.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test unconditional branch
+define i32 @main() {
+	br label %Test
+Test:		; preds = %Test, %0
+	%X = icmp eq i32 0, 4		; <i1> [#uses=1]
+	br i1 %X, label %Test, label %Label
+Label:		; preds = %Test
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
new file mode 100644
index 0000000..6f28405
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
@@ -0,0 +1,14 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @_Z14func_exit_codev() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @_Z14func_exit_codev()
+  ret i32 %call
+}
diff --git a/test/ExecutionEngine/MCJIT/test-call.ll b/test/ExecutionEngine/MCJIT/test-call.ll
new file mode 100644
index 0000000..7a244ee
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-call.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+declare void @exit(i32)
+
+define i32 @test(i8 %C, i16 %S) {
+	%X = trunc i16 %S to i8		; <i8> [#uses=1]
+	%Y = zext i8 %X to i32		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define void @FP(void (i32)* %F) {
+	%X = call i32 @test( i8 123, i16 1024 )		; <i32> [#uses=1]
+	call void %F( i32 %X )
+	ret void
+}
+
+define i32 @main() {
+	call void @FP( void (i32)* @exit )
+	ret i32 1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-cast.ll b/test/ExecutionEngine/MCJIT/test-cast.ll
new file mode 100644
index 0000000..75e7d1b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-cast.ll
@@ -0,0 +1,109 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @foo() {
+	ret i32 0
+}
+
+define i32 @main() {
+	icmp ne i1 true, false		; <i1>:1 [#uses=0]
+	zext i1 true to i8		; <i8>:2 [#uses=0]
+	zext i1 true to i8		; <i8>:3 [#uses=0]
+	zext i1 true to i16		; <i16>:4 [#uses=0]
+	zext i1 true to i16		; <i16>:5 [#uses=0]
+	zext i1 true to i32		; <i32>:6 [#uses=0]
+	zext i1 true to i32		; <i32>:7 [#uses=0]
+	zext i1 true to i64		; <i64>:8 [#uses=0]
+	zext i1 true to i64		; <i64>:9 [#uses=0]
+	uitofp i1 true to float		; <float>:10 [#uses=0]
+	uitofp i1 true to double		; <double>:11 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:12 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:13 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:14 [#uses=0]
+	bitcast i8 -1 to i8		; <i8>:15 [#uses=0]
+	sext i8 4 to i16		; <i16>:16 [#uses=0]
+	sext i8 4 to i16		; <i16>:17 [#uses=0]
+	sext i8 4 to i64		; <i64>:18 [#uses=0]
+	sext i8 4 to i64		; <i64>:19 [#uses=0]
+	sitofp i8 4 to float		; <float>:20 [#uses=0]
+	sitofp i8 4 to double		; <double>:21 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:22 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:23 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:24 [#uses=0]
+	bitcast i8 1 to i8		; <i8>:25 [#uses=0]
+	zext i8 4 to i16		; <i16>:26 [#uses=0]
+	zext i8 4 to i16		; <i16>:27 [#uses=0]
+	zext i8 4 to i64		; <i64>:28 [#uses=0]
+	zext i8 4 to i64		; <i64>:29 [#uses=0]
+	uitofp i8 0 to float		; <float>:30 [#uses=0]
+	uitofp i8 0 to double		; <double>:31 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:32 [#uses=0]
+	trunc i16 -1 to i8		; <i8>:33 [#uses=0]
+	trunc i16 255 to i8		; <i8>:34 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:35 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:36 [#uses=0]
+	sext i16 0 to i64		; <i64>:37 [#uses=0]
+	sext i16 0 to i64		; <i64>:38 [#uses=0]
+	sitofp i16 0 to float		; <float>:39 [#uses=0]
+	sitofp i16 0 to double		; <double>:40 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:41 [#uses=0]
+	trunc i16 1 to i8		; <i8>:42 [#uses=0]
+	trunc i16 255 to i8		; <i8>:43 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:44 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:45 [#uses=0]
+	zext i16 0 to i64		; <i64>:46 [#uses=0]
+	zext i16 0 to i64		; <i64>:47 [#uses=0]
+	uitofp i16 0 to float		; <float>:48 [#uses=0]
+	uitofp i16 0 to double		; <double>:49 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:50 [#uses=0]
+	trunc i32 -6 to i8		; <i8>:51 [#uses=0]
+	trunc i32 6 to i8		; <i8>:52 [#uses=0]
+	trunc i32 6 to i16		; <i16>:53 [#uses=0]
+	bitcast i32 0 to i32		; <i32>:54 [#uses=0]
+	sext i32 0 to i64		; <i64>:55 [#uses=0]
+	sext i32 0 to i64		; <i64>:56 [#uses=0]
+	sitofp i32 0 to float		; <float>:57 [#uses=0]
+	sitofp i32 0 to double		; <double>:58 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:59 [#uses=0]
+	trunc i32 7 to i8		; <i8>:60 [#uses=0]
+	trunc i32 8 to i8		; <i8>:61 [#uses=0]
+	trunc i32 9 to i16		; <i16>:62 [#uses=0]
+	bitcast i32 10 to i32		; <i32>:63 [#uses=0]
+	zext i32 0 to i64		; <i64>:64 [#uses=0]
+	zext i32 0 to i64		; <i64>:65 [#uses=0]
+	uitofp i32 0 to float		; <float>:66 [#uses=0]
+	uitofp i32 0 to double		; <double>:67 [#uses=0]
+	icmp ne i64 0, 0		; <i1>:68 [#uses=0]
+	trunc i64 0 to i8		; <i8>:69 [#uses=0]
+	trunc i64 0 to i8		; <i8>:70 [#uses=0]
+	trunc i64 0 to i16		; <i16>:71 [#uses=0]
+	trunc i64 0 to i16		; <i16>:72 [#uses=0]
+	trunc i64 0 to i32		; <i32>:73 [#uses=0]
+	trunc i64 0 to i32		; <i32>:74 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:75 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:76 [#uses=0]
+	sitofp i64 0 to float		; <float>:77 [#uses=0]
+	sitofp i64 0 to double		; <double>:78 [#uses=0]
+	icmp ne i64 1, 0		; <i1>:79 [#uses=0]
+	trunc i64 1 to i8		; <i8>:80 [#uses=0]
+	trunc i64 1 to i8		; <i8>:81 [#uses=0]
+	trunc i64 1 to i16		; <i16>:82 [#uses=0]
+	trunc i64 1 to i16		; <i16>:83 [#uses=0]
+	trunc i64 1 to i32		; <i32>:84 [#uses=0]
+	trunc i64 1 to i32		; <i32>:85 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:86 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:87 [#uses=0]
+	uitofp i64 1 to float		; <float>:88 [#uses=0]
+	uitofp i64 0 to double		; <double>:89 [#uses=0]
+	bitcast float 0.000000e+00 to float		; <float>:90 [#uses=0]
+	fpext float 0.000000e+00 to double		; <double>:91 [#uses=0]
+	fptosi double 0.000000e+00 to i8		; <i8>:92 [#uses=0]
+	fptoui double 0.000000e+00 to i8		; <i8>:93 [#uses=0]
+	fptosi double 0.000000e+00 to i16		; <i16>:94 [#uses=0]
+	fptoui double 0.000000e+00 to i16		; <i16>:95 [#uses=0]
+	fptosi double 0.000000e+00 to i32		; <i32>:96 [#uses=0]
+	fptoui double 0.000000e+00 to i32		; <i32>:97 [#uses=0]
+	fptosi double 0.000000e+00 to i64		; <i64>:98 [#uses=0]
+	fptrunc double 0.000000e+00 to float		; <float>:99 [#uses=0]
+	bitcast double 0.000000e+00 to double		; <double>:100 [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols.ll b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
new file mode 100644
index 0000000..ac1d9ac
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
@@ -0,0 +1,88 @@
+; RUN: %lli -use-mcjit -O0 -disable-lazy-compilation=false %s
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/MCJIT/test-constantexpr.ll b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
new file mode 100644
index 0000000..6b46639
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This tests to make sure that we can evaluate weird constant expressions
+
+@A = global i32 5		; <i32*> [#uses=1]
+@B = global i32 6		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%A = or i1 false, icmp slt (i32* @A, i32* @B)		; <i1> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
new file mode 100644
index 0000000..35491df
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-fp.ll b/test/ExecutionEngine/MCJIT/test-fp.ll
new file mode 100644
index 0000000..6fc5a50
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -0,0 +1,23 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Z = frem double %Y, %Y		; <double> [#uses=3]
+	%Z1 = fdiv double %Z, %W		; <double> [#uses=0]
+	%Q = fadd double %Z, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %R, double* %DP
+	ret double %Z
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
new file mode 100644
index 0000000..4a790c6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-global.ll b/test/ExecutionEngine/MCJIT/test-global.ll
new file mode 100644
index 0000000..94e0250
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@count = global i32 0, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-loadstore.ll b/test/ExecutionEngine/MCJIT/test-loadstore.ll
new file mode 100644
index 0000000..e917149
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -0,0 +1,31 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
+	%V = load i8* %P		; <i8> [#uses=1]
+	store i8 %V, i8* %P
+	%V.upgrd.4 = load i16* %P.upgrd.1		; <i16> [#uses=1]
+	store i16 %V.upgrd.4, i16* %P.upgrd.1
+	%V.upgrd.5 = load i32* %P.upgrd.2		; <i32> [#uses=1]
+	store i32 %V.upgrd.5, i32* %P.upgrd.2
+	%V.upgrd.6 = load i64* %P.upgrd.3		; <i64> [#uses=1]
+	store i64 %V.upgrd.6, i64* %P.upgrd.3
+	ret void
+}
+
+define i32 @varalloca(i32 %Size) {
+        ;; Variable sized alloca
+	%X = alloca i32, i32 %Size		; <i32*> [#uses=2]
+	store i32 %Size, i32* %X
+	%Y = load i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define i32 @main() {
+	%A = alloca i8		; <i8*> [#uses=1]
+	%B = alloca i16		; <i16*> [#uses=1]
+	%C = alloca i32		; <i32*> [#uses=1]
+	%D = alloca i64		; <i64*> [#uses=1]
+	call void @test( i8* %A, i16* %B, i32* %C, i64* %D )
+	call i32 @varalloca( i32 7 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-local.ll b/test/ExecutionEngine/MCJIT/test-local.ll
new file mode 100644
index 0000000..4f5ae47
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-local.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %count = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %count, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* %count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-logical.ll b/test/ExecutionEngine/MCJIT/test-logical.ll
new file mode 100644
index 0000000..0540c22
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-logical.ll
@@ -0,0 +1,18 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%A = and i8 4, 8		; <i8> [#uses=2]
+	%B = or i8 %A, 7		; <i8> [#uses=1]
+	%C = xor i8 %B, %A		; <i8> [#uses=0]
+	%A.upgrd.1 = and i16 4, 8		; <i16> [#uses=2]
+	%B.upgrd.2 = or i16 %A.upgrd.1, 7		; <i16> [#uses=1]
+	%C.upgrd.3 = xor i16 %B.upgrd.2, %A.upgrd.1		; <i16> [#uses=0]
+	%A.upgrd.4 = and i32 4, 8		; <i32> [#uses=2]
+	%B.upgrd.5 = or i32 %A.upgrd.4, 7		; <i32> [#uses=1]
+	%C.upgrd.6 = xor i32 %B.upgrd.5, %A.upgrd.4		; <i32> [#uses=0]
+	%A.upgrd.7 = and i64 4, 8		; <i64> [#uses=2]
+	%B.upgrd.8 = or i64 %A.upgrd.7, 7		; <i64> [#uses=1]
+	%C.upgrd.9 = xor i64 %B.upgrd.8, %A.upgrd.7		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-loop.ll b/test/ExecutionEngine/MCJIT/test-loop.ll
new file mode 100644
index 0000000..b1dbf40
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-loop.ll
@@ -0,0 +1,14 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%I = phi i32 [ 0, %0 ], [ %i2, %Loop ]		; <i32> [#uses=1]
+	%i2 = add i32 %I, 1		; <i32> [#uses=2]
+	%C = icmp eq i32 %i2, 10		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-phi.ll b/test/ExecutionEngine/MCJIT/test-phi.ll
new file mode 100644
index 0000000..fbc0808
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-phi.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test phi node
+@Y = global i32 6		; <i32*> [#uses=1]
+
+define void @blah(i32* %X) {
+; <label>:0
+	br label %T
+T:		; preds = %Dead, %0
+	phi i32* [ %X, %0 ], [ @Y, %Dead ]		; <i32*>:1 [#uses=0]
+	ret void
+Dead:		; No predecessors!
+	br label %T
+}
+
+define i32 @test(i1 %C) {
+; <label>:0
+	br i1 %C, label %T, label %T
+T:		; preds = %0, %0
+	%X = phi i32 [ 123, %0 ], [ 123, %0 ]		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @main() {
+; <label>:0
+	br label %Test
+Test:		; preds = %Dead, %0
+	%X = phi i32 [ 0, %0 ], [ %Y, %Dead ]		; <i32> [#uses=1]
+	ret i32 %X
+Dead:		; No predecessors!
+	%Y = ashr i32 12, 4		; <i32> [#uses=1]
+	br label %Test
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-ret.ll b/test/ExecutionEngine/MCJIT/test-ret.ll
new file mode 100644
index 0000000..1b90ee0
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ret.ll
@@ -0,0 +1,46 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test return instructions
+define void @test1() {
+	ret void
+}
+
+define i8 @test2() {
+	ret i8 1
+}
+
+define i8 @test3() {
+	ret i8 1
+}
+
+define i16 @test4() {
+	ret i16 -1
+}
+
+define i16 @test5() {
+	ret i16 -1
+}
+
+define i32 @main() {
+	ret i32 0
+}
+
+define i32 @test6() {
+	ret i32 4
+}
+
+define i64 @test7() {
+	ret i64 0
+}
+
+define i64 @test8() {
+	ret i64 0
+}
+
+define float @test9() {
+	ret float 1.000000e+00
+}
+
+define double @test10() {
+	ret double 2.000000e+00
+}
diff --git a/test/ExecutionEngine/MCJIT/test-return.ll b/test/ExecutionEngine/MCJIT/test-return.ll
new file mode 100644
index 0000000..9c399ca
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-return.ll
@@ -0,0 +1,8 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
new file mode 100644
index 0000000..030ff31
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
@@ -0,0 +1,24 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+
+define i32 @main() {
+	%double1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%double2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%float1 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%float2 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%test49 = fcmp oeq float %float1, %float2		; <i1> [#uses=0]
+	%test50 = fcmp oge float %float1, %float2		; <i1> [#uses=0]
+	%test51 = fcmp ogt float %float1, %float2		; <i1> [#uses=0]
+	%test52 = fcmp ole float %float1, %float2		; <i1> [#uses=0]
+	%test53 = fcmp olt float %float1, %float2		; <i1> [#uses=0]
+	%test54 = fcmp une float %float1, %float2		; <i1> [#uses=0]
+	%test55 = fcmp oeq double %double1, %double2		; <i1> [#uses=0]
+	%test56 = fcmp oge double %double1, %double2		; <i1> [#uses=0]
+	%test57 = fcmp ogt double %double1, %double2		; <i1> [#uses=0]
+	%test58 = fcmp ole double %double1, %double2		; <i1> [#uses=0]
+	%test59 = fcmp olt double %double1, %double2		; <i1> [#uses=0]
+	%test60 = fcmp une double %double1, %double2		; <i1> [#uses=0]
+	ret i32 0
+}
+
+
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-int.ll b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
new file mode 100644
index 0000000..1113efe
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
@@ -0,0 +1,69 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%int1 = add i32 0, 0		; <i32> [#uses=6]
+	%int2 = add i32 0, 0		; <i32> [#uses=6]
+	%long1 = add i64 0, 0		; <i64> [#uses=6]
+	%long2 = add i64 0, 0		; <i64> [#uses=6]
+	%sbyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%sbyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%short1 = add i16 0, 0		; <i16> [#uses=6]
+	%short2 = add i16 0, 0		; <i16> [#uses=6]
+	%ubyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%ubyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%uint1 = add i32 0, 0		; <i32> [#uses=6]
+	%uint2 = add i32 0, 0		; <i32> [#uses=6]
+	%ulong1 = add i64 0, 0		; <i64> [#uses=6]
+	%ulong2 = add i64 0, 0		; <i64> [#uses=6]
+	%ushort1 = add i16 0, 0		; <i16> [#uses=6]
+	%ushort2 = add i16 0, 0		; <i16> [#uses=6]
+	%test1 = icmp eq i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test2 = icmp uge i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test3 = icmp ugt i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test4 = icmp ule i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test5 = icmp ult i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test6 = icmp ne i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test7 = icmp eq i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test8 = icmp uge i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test9 = icmp ugt i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test10 = icmp ule i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test11 = icmp ult i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test12 = icmp ne i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test13 = icmp eq i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test14 = icmp uge i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test15 = icmp ugt i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test16 = icmp ule i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test17 = icmp ult i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test18 = icmp ne i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test19 = icmp eq i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test20 = icmp uge i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test21 = icmp ugt i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test22 = icmp ule i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test23 = icmp ult i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test24 = icmp ne i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test25 = icmp eq i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test26 = icmp sge i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test27 = icmp sgt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test28 = icmp sle i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test29 = icmp slt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test30 = icmp ne i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test31 = icmp eq i16 %short1, %short2		; <i1> [#uses=0]
+	%test32 = icmp sge i16 %short1, %short2		; <i1> [#uses=0]
+	%test33 = icmp sgt i16 %short1, %short2		; <i1> [#uses=0]
+	%test34 = icmp sle i16 %short1, %short2		; <i1> [#uses=0]
+	%test35 = icmp slt i16 %short1, %short2		; <i1> [#uses=0]
+	%test36 = icmp ne i16 %short1, %short2		; <i1> [#uses=0]
+	%test37 = icmp eq i32 %int1, %int2		; <i1> [#uses=0]
+	%test38 = icmp sge i32 %int1, %int2		; <i1> [#uses=0]
+	%test39 = icmp sgt i32 %int1, %int2		; <i1> [#uses=0]
+	%test40 = icmp sle i32 %int1, %int2		; <i1> [#uses=0]
+	%test41 = icmp slt i32 %int1, %int2		; <i1> [#uses=0]
+	%test42 = icmp ne i32 %int1, %int2		; <i1> [#uses=0]
+	%test43 = icmp eq i64 %long1, %long2		; <i1> [#uses=0]
+	%test44 = icmp sge i64 %long1, %long2		; <i1> [#uses=0]
+	%test45 = icmp sgt i64 %long1, %long2		; <i1> [#uses=0]
+	%test46 = icmp sle i64 %long1, %long2		; <i1> [#uses=0]
+	%test47 = icmp slt i64 %long1, %long2		; <i1> [#uses=0]
+	%test48 = icmp ne i64 %long1, %long2		; <i1> [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-shift.ll b/test/ExecutionEngine/MCJIT/test-shift.ll
new file mode 100644
index 0000000..2da824f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-shift.ll
@@ -0,0 +1,32 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%shamt = add i8 0, 1		; <i8> [#uses=8]
+	%shift.upgrd.1 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1.s = shl i32 1, %shift.upgrd.1		; <i32> [#uses=0]
+	%t2.s = shl i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.2 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1 = shl i32 1, %shift.upgrd.2		; <i32> [#uses=0]
+	%t2 = shl i32 1, 5		; <i32> [#uses=0]
+	%t2.s.upgrd.3 = shl i64 1, 4		; <i64> [#uses=0]
+	%t2.upgrd.4 = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.5 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1.s = ashr i32 1, %shift.upgrd.5		; <i32> [#uses=0]
+	%tr2.s = ashr i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.6 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1 = lshr i32 1, %shift.upgrd.6		; <i32> [#uses=0]
+	%tr2 = lshr i32 1, 5		; <i32> [#uses=0]
+	%tr1.l = ashr i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.7 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.l = ashr i64 1, %shift.upgrd.7		; <i64> [#uses=0]
+	%tr3.l = shl i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.8 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.l = shl i64 1, %shift.upgrd.8		; <i64> [#uses=0]
+	%tr1.u = lshr i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.9 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.u = lshr i64 1, %shift.upgrd.9		; <i64> [#uses=0]
+	%tr3.u = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.10 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.u = shl i64 1, %shift.upgrd.10		; <i64> [#uses=0]
+	ret i32 0
+}
diff --git a/test/Feature/globalredefinition3.ll b/test/Feature/globalredefinition3.ll
index 5a5b3f1..2551d93 100644
--- a/test/Feature/globalredefinition3.ll
+++ b/test/Feature/globalredefinition3.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null |& grep {redefinition of global '@B'}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "redefinition of global '@B'"
 
 @B = global i32 7
 @B = global i32 7
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index c4e3db6..9e7dc6d 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
+; RUN: FileCheck %s < %t1.ll
 
 declare i1 @llvm.isunordered.f32(float, float)
 
@@ -58,3 +59,12 @@ define void @libm() {
 }
 
 ; FIXME: test ALL the intrinsics in this file.
+
+; rdar://11542750
+; CHECK: declare void @llvm.trap() noreturn nounwind
+declare void @llvm.trap()
+
+define void @trap() {
+  call void @llvm.trap()
+  ret void
+}
diff --git a/test/Feature/load_module.ll b/test/Feature/load_module.ll
index 05f6c23..14c1153 100644
--- a/test/Feature/load_module.ll
+++ b/test/Feature/load_module.ll
@@ -1,6 +1,6 @@
 ; PR1318
 ; RUN: opt < %s -load=%llvmshlibdir/LLVMHello%shlibext -hello \
-; RUN:   -disable-output |& grep Hello
+; RUN:   -disable-output 2>&1 | grep Hello
 ; REQUIRES: loadable_module
 ; FIXME: On Cygming, it might fail without building LLVMHello manually.
 
diff --git a/test/Feature/packed_struct.ll b/test/Feature/packed_struct.ll
index 4d4ace9..0766649 100644
--- a/test/Feature/packed_struct.ll
+++ b/test/Feature/packed_struct.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 ; RUN: not grep cast %t2.ll
-; RUN: grep {\\}>} %t2.ll
+; RUN: grep "}>" %t2.ll
 ; END.
 
 %struct.anon = type <{ i8, i32, i32, i32 }>
diff --git a/test/Feature/vector-cast-constant-exprs.ll b/test/Feature/vector-cast-constant-exprs.ll
index ffdc0f0..992987c 100644
--- a/test/Feature/vector-cast-constant-exprs.ll
+++ b/test/Feature/vector-cast-constant-exprs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | not grep {ret.*(}
+; RUN: llvm-as < %s | llvm-dis | not grep "ret.*("
 
 ; All of these constant expressions should fold.
 
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
new file mode 100644
index 0000000..294ca8a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -0,0 +1,73 @@
+; Test basic address sanitizer instrumentation.
+;
+; RUN: opt < %s -asan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test_load(i32* %a) address_safety {
+; CHECK: @test_load
+; CHECK-NOT: load
+; CHECK:   %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK:   lshr i64 %[[LOAD_ADDR]], 3
+; CHECK:   or i64
+; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
+; CHECK:   icmp ne i8
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+;
+; First instrumentation block refines the shadow test.
+; CHECK:   and i64 %[[LOAD_ADDR]], 7
+; CHECK:   add i64 %{{.*}}, 3
+; CHECK:   trunc i64 %{{.*}} to i8
+; CHECK:   icmp sge i8 %{{.*}}, %[[LOAD_SHADOW]]
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+;
+; The actual load comes next because ASan adds the crash block
+; to the end of the function.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+
+; The crash block reports the error.
+; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
+; CHECK:   unreachable
+;
+
+
+entry:
+  %tmp1 = load i32* %a
+  ret i32 %tmp1
+}
+
+define void @test_store(i32* %a) address_safety {
+; CHECK: @test_store
+; CHECK-NOT: store
+; CHECK:   %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK:   lshr i64 %[[STORE_ADDR]], 3
+; CHECK:   or i64
+; CHECK:   %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK:   %[[STORE_SHADOW:[^ ]*]] = load i8* %[[STORE_SHADOW_PTR]]
+; CHECK:   icmp ne i8
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+;
+; First instrumentation block refines the shadow test.
+; CHECK:   and i64 %[[STORE_ADDR]], 7
+; CHECK:   add i64 %{{.*}}, 3
+; CHECK:   trunc i64 %{{.*}} to i8
+; CHECK:   icmp sge i8 %{{.*}}, %[[STORE_SHADOW]]
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+;
+; The actual load comes next because ASan adds the crash block
+; to the end of the function.
+; CHECK:   store i32 42, i32* %a
+; CHECK:   ret void
+;
+; The crash block reports the error.
+; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]])
+; CHECK:   unreachable
+;
+
+entry:
+  store i32 42, i32* %a
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index fc27de9..d544d77 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -12,3 +12,25 @@ entry:
 ; Check for ASAN's Offset for 64-bit (2^44)
 ; CHECK-NEXT: 17592186044416
 ; CHECK: ret
+
+define void @example_atomicrmw(i64* %ptr) nounwind uwtable address_safety {
+entry:
+  %0 = atomicrmw add i64* %ptr, i64 1 seq_cst
+  ret void
+}
+
+; CHECK: @example_atomicrmw
+; CHECK: lshr {{.*}} 3
+; CHECK: atomicrmw
+; CHECK: ret
+
+define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable address_safety {
+entry:
+  %0 = cmpxchg i64* %ptr, i64 %compare_to, i64 %new_value seq_cst
+  ret void
+}
+
+; CHECK: @example_cmpxchg
+; CHECK: lshr {{.*}} 3
+; CHECK: cmpxchg
+; CHECK: ret
diff --git a/test/Instrumentation/BoundsChecking/lit.local.cfg b/test/Instrumentation/BoundsChecking/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/BoundsChecking/many-trap.ll b/test/Instrumentation/BoundsChecking/many-trap.ll
new file mode 100644
index 0000000..0bbb959
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/many-trap.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+; RUN: opt < %s -bounds-checking -bounds-checking-single-trap -S | FileCheck -check-prefix=SINGLE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: @f1
+define void @f1(i64 %x) nounwind {
+  %1 = alloca i128, i64 %x
+  %2 = load i128* %1, align 4
+  %3 = load i128* %1, align 4
+  ret void
+; CHECK: call void @llvm.trap()
+; CHECK: call void @llvm.trap()
+; CHECK-NOT: call void @llvm.trap()
+; SINGLE: call void @llvm.trap()
+; SINGLE-NOT: call void @llvm.trap()
+}
diff --git a/test/Instrumentation/BoundsChecking/phi.ll b/test/Instrumentation/BoundsChecking/phi.ll
new file mode 100644
index 0000000..86b5922
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/phi.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@global = private unnamed_addr constant [10 x i8] c"ola\00mundo\00", align 1
+
+; CHECK: f1
+; no checks are possible here
+; CHECK-NOT: trap
+define void @f1(i8* nocapture %c) {
+entry:
+  %0 = load i8* %c, align 1
+  %tobool1 = icmp eq i8 %0, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:
+  %c.addr.02 = phi i8* [ %incdec.ptr, %while.body ], [ %c, %entry ]
+  %incdec.ptr = getelementptr inbounds i8* %c.addr.02, i64 -1
+  store i8 100, i8* %c.addr.02, align 1
+  %1 = load i8* %incdec.ptr, align 1
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+
+; CHECK: f2
+define void @f2() {
+while.body.i.preheader:
+  %addr = getelementptr inbounds [10 x i8]* @global, i64 0, i64 9
+  br label %while.body.i
+
+while.body.i:
+; CHECK: phi
+; CHECK-NEXT: phi
+; CHECK-NOT: phi
+  %c.addr.02.i = phi i8* [ %incdec.ptr.i, %while.body.i ], [ %addr, %while.body.i.preheader ]
+  %incdec.ptr.i = getelementptr inbounds i8* %c.addr.02.i, i64 -1
+; CHECK: sub i64 10, %0
+; CHECK-NEXT: icmp ult i64 10, %0
+; CHECK-NEXT: icmp ult i64 {{.*}}, 1
+; CHECK-NEXT: or i1
+; CHECK-NEXT: br {{.*}}, label %trap
+  store i8 100, i8* %c.addr.02.i, align 1
+  %0 = load i8* %incdec.ptr.i, align 1
+  %tobool.i = icmp eq i8 %0, 0
+  br i1 %tobool.i, label %fn.exit, label %while.body.i
+
+fn.exit:
+  ret void
+}
diff --git a/test/Instrumentation/BoundsChecking/simple.ll b/test/Instrumentation/BoundsChecking/simple.ll
new file mode 100644
index 0000000..16870c7
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/simple.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
+
+declare noalias i8* @malloc(i64) nounwind
+declare noalias i8* @calloc(i64, i64) nounwind
+declare noalias i8* @realloc(i8* nocapture, i64) nounwind
+
+; CHECK: @f1
+define void @f1() nounwind {
+  %1 = tail call i8* @malloc(i64 32)
+  %2 = bitcast i8* %1 to i32*
+  %idx = getelementptr inbounds i32* %2, i64 2
+; CHECK-NOT: trap
+  store i32 3, i32* %idx, align 4
+  ret void
+}
+
+; CHECK: @f2
+define void @f2() nounwind {
+  %1 = tail call i8* @malloc(i64 32)
+  %2 = bitcast i8* %1 to i32*
+  %idx = getelementptr inbounds i32* %2, i64 8
+; CHECK: trap
+  store i32 3, i32* %idx, align 4
+  ret void
+}
+
+; CHECK: @f3
+define void @f3(i64 %x) nounwind {
+  %1 = tail call i8* @calloc(i64 4, i64 %x)
+  %2 = bitcast i8* %1 to i32*
+  %idx = getelementptr inbounds i32* %2, i64 8
+; CHECK: mul i64 4, %
+; CHECK: sub i64 {{.*}}, 32
+; CHECK-NEXT: icmp ult i64 {{.*}}, 32
+; CHECK-NEXT: icmp ult i64 {{.*}}, 4
+; CHECK-NEXT: or i1
+; CHECK: trap
+  store i32 3, i32* %idx, align 4
+  ret void
+}
+
+; CHECK: @f4
+define void @f4(i64 %x) nounwind {
+  %1 = tail call i8* @realloc(i8* null, i64 %x) nounwind
+  %2 = bitcast i8* %1 to i32*
+  %idx = getelementptr inbounds i32* %2, i64 8
+; CHECK: trap
+  %3 = load i32* %idx, align 4
+  ret void
+}
+
+; CHECK: @f5
+define void @f5(i64 %x) nounwind {
+  %idx = getelementptr inbounds [8 x i8]* @.str, i64 0, i64 %x
+; CHECK: trap
+  %1 = load i8* %idx, align 4
+  ret void
+}
+
+; CHECK: @f6
+define void @f6(i64 %x) nounwind {
+  %1 = alloca i128
+; CHECK-NOT: trap
+  %2 = load i128* %1, align 4
+  ret void
+}
+
+; CHECK: @f7
+define void @f7(i64 %x) nounwind {
+  %1 = alloca i128, i64 %x
+; CHECK: mul i64 16,
+; CHECK: trap
+  %2 = load i128* %1, align 4
+  ret void
+}
+
+; CHECK: @f8
+define void @f8() nounwind {
+  %1 = alloca i128
+  %2 = alloca i128
+  %3 = select i1 undef, i128* %1, i128* %2
+; CHECK-NOT: trap
+  %4 = load i128* %3, align 4
+  ret void
+}
+
+; CHECK: @f9
+define void @f9(i128* %arg) nounwind {
+  %1 = alloca i128
+  %2 = select i1 undef, i128* %arg, i128* %1
+; CHECK-NOT: trap
+  %3 = load i128* %2, align 4
+  ret void
+}
+
+; CHECK: @f10
+define void @f10(i64 %x, i64 %y) nounwind {
+  %1 = alloca i128, i64 %x
+  %2 = alloca i128, i64 %y
+  %3 = select i1 undef, i128* %1, i128* %2
+; CHECK: select
+; CHECK: select
+; CHECK: trap
+  %4 = load i128* %3, align 4
+  ret void
+}
+
+; CHECK: @f11
+define void @f11(i128* byval %x) nounwind {
+  %1 = bitcast i128* %x to i8*
+  %2 = getelementptr inbounds i8* %1, i64 16
+; CHECK: br label
+  %3 = load i8* %2, align 4
+  ret void
+}
+
+; CHECK: @f12
+define i64 @f12(i64 %x, i64 %y) nounwind {
+  %1 = tail call i8* @calloc(i64 1, i64 %x)
+; CHECK: mul i64 %y, 8
+  %2 = bitcast i8* %1 to i64*
+  %3 = getelementptr inbounds i64* %2, i64 %y
+  %4 = load i64* %3, align 8
+  ret i64 %4
+}
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
new file mode 100644
index 0000000..02bf215
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -0,0 +1,323 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that atomic memory operations are converted to calls into ThreadSanitizer runtime.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @atomic8_load_unordered(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a unordered, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_unordered
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 1)
+
+define i8 @atomic8_load_monotonic(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a monotonic, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_monotonic
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 1)
+
+define i8 @atomic8_load_acquire(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a acquire, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_acquire
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 4)
+
+define i8 @atomic8_load_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a seq_cst, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_seq_cst
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 32)
+
+define void @atomic8_store_unordered(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a unordered, align 1
+  ret void
+}
+; CHECK: atomic8_store_unordered
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 1)
+
+define void @atomic8_store_monotonic(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a monotonic, align 1
+  ret void
+}
+; CHECK: atomic8_store_monotonic
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 1)
+
+define void @atomic8_store_release(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a release, align 1
+  ret void
+}
+; CHECK: atomic8_store_release
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 8)
+
+define void @atomic8_store_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a seq_cst, align 1
+  ret void
+}
+; CHECK: atomic8_store_seq_cst
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 32)
+
+define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a unordered, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_unordered
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 1)
+
+define i16 @atomic16_load_monotonic(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a monotonic, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_monotonic
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 1)
+
+define i16 @atomic16_load_acquire(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a acquire, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_acquire
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 4)
+
+define i16 @atomic16_load_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a seq_cst, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_seq_cst
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 32)
+
+define void @atomic16_store_unordered(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a unordered, align 2
+  ret void
+}
+; CHECK: atomic16_store_unordered
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 1)
+
+define void @atomic16_store_monotonic(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a monotonic, align 2
+  ret void
+}
+; CHECK: atomic16_store_monotonic
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 1)
+
+define void @atomic16_store_release(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a release, align 2
+  ret void
+}
+; CHECK: atomic16_store_release
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 8)
+
+define void @atomic16_store_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a seq_cst, align 2
+  ret void
+}
+; CHECK: atomic16_store_seq_cst
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 32)
+
+define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a unordered, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_unordered
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 1)
+
+define i32 @atomic32_load_monotonic(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a monotonic, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_monotonic
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 1)
+
+define i32 @atomic32_load_acquire(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a acquire, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_acquire
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 4)
+
+define i32 @atomic32_load_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a seq_cst, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_seq_cst
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 32)
+
+define void @atomic32_store_unordered(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a unordered, align 4
+  ret void
+}
+; CHECK: atomic32_store_unordered
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 1)
+
+define void @atomic32_store_monotonic(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a monotonic, align 4
+  ret void
+}
+; CHECK: atomic32_store_monotonic
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 1)
+
+define void @atomic32_store_release(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a release, align 4
+  ret void
+}
+; CHECK: atomic32_store_release
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 8)
+
+define void @atomic32_store_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a seq_cst, align 4
+  ret void
+}
+; CHECK: atomic32_store_seq_cst
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 32)
+
+define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a unordered, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_unordered
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 1)
+
+define i64 @atomic64_load_monotonic(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a monotonic, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_monotonic
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 1)
+
+define i64 @atomic64_load_acquire(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a acquire, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_acquire
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 4)
+
+define i64 @atomic64_load_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a seq_cst, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_seq_cst
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 32)
+
+define void @atomic64_store_unordered(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a unordered, align 8
+  ret void
+}
+; CHECK: atomic64_store_unordered
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 1)
+
+define void @atomic64_store_monotonic(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a monotonic, align 8
+  ret void
+}
+; CHECK: atomic64_store_monotonic
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 1)
+
+define void @atomic64_store_release(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a release, align 8
+  ret void
+}
+; CHECK: atomic64_store_release
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 8)
+
+define void @atomic64_store_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a seq_cst, align 8
+  ret void
+}
+; CHECK: atomic64_store_seq_cst
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 32)
+
+define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a unordered, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_unordered
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 1)
+
+define i128 @atomic128_load_monotonic(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a monotonic, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_monotonic
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 1)
+
+define i128 @atomic128_load_acquire(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a acquire, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_acquire
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 4)
+
+define i128 @atomic128_load_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a seq_cst, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_seq_cst
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 32)
+
+define void @atomic128_store_unordered(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a unordered, align 16
+  ret void
+}
+; CHECK: atomic128_store_unordered
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 1)
+
+define void @atomic128_store_monotonic(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a monotonic, align 16
+  ret void
+}
+; CHECK: atomic128_store_monotonic
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 1)
+
+define void @atomic128_store_release(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a release, align 16
+  ret void
+}
+; CHECK: atomic128_store_release
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 8)
+
+define void @atomic128_store_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a seq_cst, align 16
+  ret void
+}
+; CHECK: atomic128_store_seq_cst
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 32)
diff --git a/test/Integer/packed_struct_bt.ll b/test/Integer/packed_struct_bt.ll
index a4d01e7..257c1c6 100644
--- a/test/Integer/packed_struct_bt.ll
+++ b/test/Integer/packed_struct_bt.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 ; RUN: not grep cast %t2.ll
-; RUN: grep {\\}>} %t2.ll
+; RUN: grep "}>" %t2.ll
 ; END.
 
 %struct.anon = type <{ i8, i35, i35, i35 }>
diff --git a/test/Integer/varargs_bt.ll b/test/Integer/varargs_bt.ll
deleted file mode 100644
index 25ad58a..0000000
--- a/test/Integer/varargs_bt.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Demonstrate all of the variable argument handling intrinsic functions plus 
-; the va_arg instruction.
-
-declare void @llvm.va_start(i8** %ap)
-declare void @llvm.va_copy(i8** %aq, i8** %ap)
-declare void @llvm.va_end(i8** %ap)
-
-define i33 @test(i33 %X, ...) {
-        %ap = alloca i8*
-	call void @llvm.va_start(i8** %ap)
-	%tmp = va_arg i8** %ap, i33 
-
-        %aq = alloca i8*
-	call void @llvm.va_copy(i8** %aq, i8** %ap)
-	call void @llvm.va_end(i8** %aq)
-	
-	call void @llvm.va_end(i8** %ap)
-	ret i33 %tmp
-}
diff --git a/test/Integer/varargs_new_bt.ll b/test/Integer/varargs_new_bt.ll
deleted file mode 100644
index 59bb3f2..0000000
--- a/test/Integer/varargs_new_bt.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Demonstrate all of the variable argument handling intrinsic functions plus 
-; the va_arg instruction.
-
-declare void @llvm.va_start(i8**)
-declare void @llvm.va_copy(i8**, i8*)
-declare void @llvm.va_end(i8**)
-
-define i31 @test(i31 %X, ...) {
-        ; Allocate two va_list items.  On this target, va_list is of type i8*
-        %ap = alloca i8*             ; <i8**> [#uses=4]
-        %aq = alloca i8*             ; <i8**> [#uses=2]
-
-        ; Initialize variable argument processing
-        call void @llvm.va_start(i8** %ap)
-
-        ; Read a single integer argument
-        %tmp = va_arg i8** %ap, i31           ; <i31> [#uses=1]
-
-        ; Demonstrate usage of llvm.va_copy and llvm_va_end
-        %apv = load i8** %ap         ; <i8*> [#uses=1]
-        call void @llvm.va_copy(i8** %aq, i8* %apv)
-        call void @llvm.va_end(i8** %aq)
-
-        ; Stop processing of arguments.
-        call void @llvm.va_end(i8** %ap)
-        ret i31 %tmp
-
-}
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index cc34634..e7431ec 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -1,9 +1,9 @@
 ; This fails because the linker renames the external symbol not the internal 
 ; one...
 
-; RUN: echo {define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
+; RUN: echo "define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {@foo()} | grep -v internal
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "@foo()" | grep -v internal
 
 define i32 @foo() { ret i32 0 }
 
diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll
index 043457d..94fb5e0 100644
--- a/test/Linker/2003-01-30-LinkerTypeRename.ll
+++ b/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -1,9 +1,9 @@
 ; This fails because the linker renames the non-opaque type not the opaque
 ; one...
 
-; RUN: echo {%%Ty = type opaque @GV = external global %%Ty*} | llvm-as > %t.1.bc
+; RUN: echo "%%Ty = type opaque @GV = external global %%Ty*" | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {%%Ty } | not grep opaque
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "%%Ty " | not grep opaque
 
 %Ty = type {i32}
 
diff --git a/test/Linker/2003-04-21-Linkage.ll b/test/Linker/2003-04-21-Linkage.ll
deleted file mode 100644
index f6d4c4b..0000000
--- a/test/Linker/2003-04-21-Linkage.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: echo {@X = linkonce global i32 5 \
-; RUN:   define linkonce i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
-; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc  %t.2.bc
-@X = external global i32 
-
-declare i32 @foo() 
-
-define void @bar() {
-	load i32* @X
-	call i32 @foo()
-	ret void
-}
-
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index beaf6ec..98a943a 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -1,7 +1,7 @@
 ; This fails because the linker renames the non-opaque type not the opaque 
 ; one...
 
-; RUN: echo { define linkonce void @foo() \{ ret void \} } | \
+; RUN: echo " define linkonce void @foo() { ret void } " | \
 ; RUN:   llvm-as -o %t.2.bc
 ; RUN: llvm-as %s -o %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce
diff --git a/test/Linker/2003-04-26-NullPtrLinkProblem.ll b/test/Linker/2003-04-26-NullPtrLinkProblem.ll
index d23df1b..5e8249b 100644
--- a/test/Linker/2003-04-26-NullPtrLinkProblem.ll
+++ b/test/Linker/2003-04-26-NullPtrLinkProblem.ll
@@ -1,7 +1,7 @@
 ; This one fails because the LLVM runtime is allowing two null pointers of
 ; the same type to be created!
 
-; RUN: echo {%%T = type i32} | llvm-as > %t.2.bc
+; RUN: echo "%%T = type i32" | llvm-as > %t.2.bc
 ; RUN: llvm-as %s -o %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
diff --git a/test/Linker/2003-05-15-TypeProblem.ll b/test/Linker/2003-05-15-TypeProblem.ll
index 18fcea0..c1fe334 100644
--- a/test/Linker/2003-05-15-TypeProblem.ll
+++ b/test/Linker/2003-05-15-TypeProblem.ll
@@ -1,7 +1,7 @@
 ; This one fails because the LLVM runtime is allowing two null pointers of
 ; the same type to be created!
 
-; RUN: echo {%M = type \{ %N*\} %N = type opaque} | llvm-as > %t.2.bc
+; RUN: echo "%M = type { %N*} %N = type opaque" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index 80e0a69..dff861d 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -4,9 +4,9 @@
 ; the function name, we must rename the internal function to something that 
 ; does not conflict.
 
-; RUN: echo { define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
+; RUN: echo " define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep {@foo(}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep "@foo("
 
 declare i32 @foo() 
 
diff --git a/test/Linker/2003-06-02-TypeResolveProblem.ll b/test/Linker/2003-06-02-TypeResolveProblem.ll
index 0b0e9c1..fa24b6d 100644
--- a/test/Linker/2003-06-02-TypeResolveProblem.ll
+++ b/test/Linker/2003-06-02-TypeResolveProblem.ll
@@ -1,4 +1,4 @@
-; RUN: echo {%%T = type opaque} | llvm-as > %t.2.bc
+; RUN: echo "%%T = type opaque" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
diff --git a/test/Linker/2003-06-02-TypeResolveProblem2.ll b/test/Linker/2003-06-02-TypeResolveProblem2.ll
index 3f9fd04..3ae23a2 100644
--- a/test/Linker/2003-06-02-TypeResolveProblem2.ll
+++ b/test/Linker/2003-06-02-TypeResolveProblem2.ll
@@ -1,4 +1,4 @@
-; RUN: echo {%%T = type i32} | llvm-as > %t.1.bc
+; RUN: echo "%%T = type i32" | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
diff --git a/test/Linker/2003-08-20-OpaqueTypeResolve.ll b/test/Linker/2003-08-20-OpaqueTypeResolve.ll
index c0fc620..175146f 100644
--- a/test/Linker/2003-08-20-OpaqueTypeResolve.ll
+++ b/test/Linker/2003-08-20-OpaqueTypeResolve.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo {%M = type \{ i32, i32* \} } | llvm-as > %t.out2.bc
+; RUN: echo "%M = type { i32, i32* } " | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc
 
 %M = type { i32, %N* }
diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll
index 255cb88..e934836 100644
--- a/test/Linker/2003-08-23-GlobalVarLinking.ll
+++ b/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo {%%T1 = type opaque %%T2 = type opaque @S = external global \{ i32, %%T1* \} declare void @F(%%T2*)}\
+; RUN: echo "%%T1 = type opaque %%T2 = type opaque @S = external global { i32, %%T1* } declare void @F(%%T2*)"\
 ; RUN:   | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
 
diff --git a/test/Linker/2003-08-24-InheritPtrSize.ll b/test/Linker/2003-08-24-InheritPtrSize.ll
index f93c054..51d544b 100644
--- a/test/Linker/2003-08-24-InheritPtrSize.ll
+++ b/test/Linker/2003-08-24-InheritPtrSize.ll
@@ -2,8 +2,8 @@
 ; specified pointer size should not cause a warning!
 
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo {} | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc |& not grep warning
+; RUN: echo "" | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | not grep warning
 
 target datalayout = "e-p:64:64"
 
diff --git a/test/Linker/2004-12-03-DisagreeingType.ll b/test/Linker/2004-12-03-DisagreeingType.ll
index 570bda8..73d7a40 100644
--- a/test/Linker/2004-12-03-DisagreeingType.ll
+++ b/test/Linker/2004-12-03-DisagreeingType.ll
@@ -1,7 +1,7 @@
-; RUN: echo {@G = weak global \{\{\{\{double\}\}\}\} zeroinitializer } | \
+; RUN: echo "@G = weak global {{{{double}}}} zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep {\\}}
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep "}"
 
 ; When linked, the global above should be eliminated, being merged with the 
 ; global below.
diff --git a/test/Linker/2005-02-12-ConstantGlobals-2.ll b/test/Linker/2005-02-12-ConstantGlobals-2.ll
index 2ceae31..30bfafe 100644
--- a/test/Linker/2005-02-12-ConstantGlobals-2.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals-2.ll
@@ -1,8 +1,8 @@
 ; Test that a prototype can be marked const, and the definition is allowed
 ; to be nonconst.
 
-; RUN: echo {@X = external constant i32} | llvm-as > %t.2.bc
+; RUN: echo "@X = external constant i32" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
 
 @X = global i32 7
diff --git a/test/Linker/2005-02-12-ConstantGlobals.ll b/test/Linker/2005-02-12-ConstantGlobals.ll
index 60f176b..93709cf 100644
--- a/test/Linker/2005-02-12-ConstantGlobals.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals.ll
@@ -1,8 +1,8 @@
 ; Test that a prototype can be marked const, and the definition is allowed
 ; to be nonconst.
 
-; RUN: echo {@X = global i32 7} | llvm-as > %t.2.bc
+; RUN: echo "@X = global i32 7" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
 
 @X = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
index 7d1020d..d7a34c8 100644
--- a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
+++ b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
@@ -1,7 +1,7 @@
-; RUN: echo { @G = appending global \[0 x i32\] zeroinitializer } | \
+; RUN: echo " @G = appending global [0 x i32] zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep {@G =}
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep "@G ="
 
 ; When linked, the globals should be merged, and the result should still 
 ; be named '@G'.
diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll
index df3284b..eec8f63 100644
--- a/test/Linker/2006-06-15-GlobalVarAlignment.ll
+++ b/test/Linker/2006-06-15-GlobalVarAlignment.ll
@@ -1,7 +1,7 @@
 ; The linker should choose the largest alignment when linking.
 
-; RUN: echo {@X = global i32 7, align 8} | llvm-as > %t.2.bc
+; RUN: echo "@X = global i32 7, align 8" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {align 8}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "align 8"
 
 @X = weak global i32 7, align 4
diff --git a/test/Linker/2008-03-07-DroppedSection_a.ll b/test/Linker/2008-03-07-DroppedSection_a.ll
index 4458971..ec9d5c2 100644
--- a/test/Linker/2008-03-07-DroppedSection_a.ll
+++ b/test/Linker/2008-03-07-DroppedSection_a.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_b.ll > %t2.bc
-; RUN: llvm-ld -r -disable-opt %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
 ; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
 
 ; ModuleID = 't.bc'
diff --git a/test/Linker/2008-03-07-DroppedSection_b.ll b/test/Linker/2008-03-07-DroppedSection_b.ll
index 884bf0a..63b64f6 100644
--- a/test/Linker/2008-03-07-DroppedSection_b.ll
+++ b/test/Linker/2008-03-07-DroppedSection_b.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_a.ll > %t2.bc
-; RUN: llvm-ld -r -disable-opt %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
 ; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
 
 ; ModuleID = 'u.bc'
diff --git a/test/Linker/2008-06-13-LinkOnceRedefinition.ll b/test/Linker/2008-06-13-LinkOnceRedefinition.ll
index 49da96a..da4b48d 100644
--- a/test/Linker/2008-06-13-LinkOnceRedefinition.ll
+++ b/test/Linker/2008-06-13-LinkOnceRedefinition.ll
@@ -2,7 +2,7 @@
 ; in different modules.
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: llvm-as %s -o %t.foo2.bc
-; RUN: echo {define linkonce void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
+; RUN: echo "define linkonce void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc
 ; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S
 ; RUN: llvm-link %t.foo1.bc %t.foo3.bc -S
 define linkonce void @foo() { ret void }
diff --git a/test/Linker/2008-06-26-AddressSpace.ll b/test/Linker/2008-06-26-AddressSpace.ll
index e3ed385..e1d3574 100644
--- a/test/Linker/2008-06-26-AddressSpace.ll
+++ b/test/Linker/2008-06-26-AddressSpace.ll
@@ -2,8 +2,8 @@
 ; in different modules.
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: echo | llvm-as -o %t.foo2.bc
-; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep {addrspace(2)}
-; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep {addrspace(2)}
+; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep "addrspace(2)"
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep "addrspace(2)"
 ; rdar://6038021
 
 @G = addrspace(2) global i32 256 
diff --git a/test/Linker/AppendingLinkage.ll b/test/Linker/AppendingLinkage.ll
index 134a42e..014ead9 100644
--- a/test/Linker/AppendingLinkage.ll
+++ b/test/Linker/AppendingLinkage.ll
@@ -1,6 +1,6 @@
 ; Test that appending linkage works correctly.
 
-; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
+; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8
diff --git a/test/Linker/AppendingLinkage2.ll b/test/Linker/AppendingLinkage2.ll
index 2c1302f..7385efb 100644
--- a/test/Linker/AppendingLinkage2.ll
+++ b/test/Linker/AppendingLinkage2.ll
@@ -1,6 +1,6 @@
 ; Test that appending linkage works correctly when arrays are the same size.
 
-; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
+; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8
diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll
index 8fdbe50..716eb3d 100644
--- a/test/Linker/ConstantGlobals1.ll
+++ b/test/Linker/ConstantGlobals1.ll
@@ -1,6 +1,6 @@
 ; Test that appending linkage works correctly when arrays are the same size.
 
-; RUN: echo {@X = constant \[1 x i32\] \[i32 8\] } | \
+; RUN: echo "@X = constant [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll
index ad4428b..ad0f8e2 100644
--- a/test/Linker/ConstantGlobals2.ll
+++ b/test/Linker/ConstantGlobals2.ll
@@ -1,6 +1,6 @@
 ; Test that appending linkage works correctly when arrays are the same size.
 
-; RUN: echo {@X = external global \[1 x i32\] } | \
+; RUN: echo "@X = external global [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll
index e25529a..5aa26bc 100644
--- a/test/Linker/ConstantGlobals3.ll
+++ b/test/Linker/ConstantGlobals3.ll
@@ -1,6 +1,6 @@
 ; Test that appending linkage works correctly when arrays are the same size.
 
-; RUN: echo {@X = external constant \[1 x i32\] } | \
+; RUN: echo "@X = external constant [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
diff --git a/test/Linker/Inputs/PR11464.a.ll b/test/Linker/Inputs/PR11464.a.ll
new file mode 100644
index 0000000..25a9350
--- /dev/null
+++ b/test/Linker/Inputs/PR11464.a.ll
@@ -0,0 +1,3 @@
+%bug_type = type opaque
+declare i32 @bug_a(%bug_type*)
+declare i32 @bug_b(%bug_type*)
diff --git a/test/Linker/Inputs/PR11464.b.ll b/test/Linker/Inputs/PR11464.b.ll
new file mode 100644
index 0000000..7ef5a36
--- /dev/null
+++ b/test/Linker/Inputs/PR11464.b.ll
@@ -0,0 +1,13 @@
+%bug_type = type { %bug_type* }
+%bar = type { i32 }
+
+define i32 @bug_a(%bug_type* %fp) nounwind uwtable {
+entry:
+  %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0
+  ret i32 0
+}
+
+define i32 @bug_b(%bar* %a) nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Linker/Inputs/PR8300.a.ll b/test/Linker/Inputs/PR8300.a.ll
new file mode 100644
index 0000000..c705db3
--- /dev/null
+++ b/test/Linker/Inputs/PR8300.a.ll
@@ -0,0 +1,2 @@
+%foo2 = type { [8 x i8] }
+declare void @zed(%foo2*)
diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll
new file mode 100644
index 0000000..9e538f5
--- /dev/null
+++ b/test/Linker/Inputs/PR8300.b.ll
@@ -0,0 +1,9 @@
+%foo = type { [8 x i8] }
+%bar = type { [9 x i8] }
+
+@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+
+define void @xyz(%bar* %this) {
+entry:
+  ret void
+}
diff --git a/test/Linker/Inputs/basiclink.a.ll b/test/Linker/Inputs/basiclink.a.ll
new file mode 100644
index 0000000..997932d
--- /dev/null
+++ b/test/Linker/Inputs/basiclink.a.ll
@@ -0,0 +1,2 @@
+define i32* @foo(i32 %x) { ret i32* @baz }
+@baz = external global i32
diff --git a/test/Linker/Inputs/basiclink.b.ll b/test/Linker/Inputs/basiclink.b.ll
new file mode 100644
index 0000000..0d2abc7
--- /dev/null
+++ b/test/Linker/Inputs/basiclink.b.ll
@@ -0,0 +1,6 @@
+declare i32* @foo(...)
+define i32* @bar() {
+	%ret = call i32* (...)* @foo( i32 123 )
+	ret i32* %ret
+}
+@baz = global i32 0
diff --git a/test/Linker/Inputs/linkage.a.ll b/test/Linker/Inputs/linkage.a.ll
new file mode 100644
index 0000000..8a156f6
--- /dev/null
+++ b/test/Linker/Inputs/linkage.a.ll
@@ -0,0 +1,2 @@
+@X = linkonce global i32 5
+define linkonce i32 @foo() { ret i32 7 }
diff --git a/test/Linker/Inputs/linkage.b.ll b/test/Linker/Inputs/linkage.b.ll
new file mode 100644
index 0000000..0ada3f4
--- /dev/null
+++ b/test/Linker/Inputs/linkage.b.ll
@@ -0,0 +1,10 @@
+@X = external global i32 
+
+declare i32 @foo() 
+
+define void @bar() {
+	load i32* @X
+	call i32 @foo()
+	ret void
+}
+
diff --git a/test/Linker/PR8300.ll b/test/Linker/PR8300.ll
index f0fc1e7..7c03d5b 100644
--- a/test/Linker/PR8300.ll
+++ b/test/Linker/PR8300.ll
@@ -1,13 +1 @@
-; RUN: echo {%foo2 = type \{ \[8 x i8\] \} \
-; RUN:       declare void @zed(%foo2*) } > %t.ll
-; RUN: llvm-link %t.ll %s -o %t.bc
-
-%foo = type { [8 x i8] }
-%bar = type { [9 x i8] }
-
-@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
-
-define void @xyz(%bar* %this) {
-entry:
-  ret void
-}
+; RUN: llvm-link %S/Inputs/PR8300.a.ll %S/Inputs/PR8300.b.ll -o %t.bc
diff --git a/test/Linker/basiclink.ll b/test/Linker/basiclink.ll
index afe0320..804329a 100644
--- a/test/Linker/basiclink.ll
+++ b/test/Linker/basiclink.ll
@@ -1,13 +1,6 @@
 ; Test linking two functions with different prototypes and two globals 
 ; in different modules. This is for PR411
-; RUN: llvm-as %s -o %t.bar.bc
-; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
-; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
-; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc
+; RUN: llvm-as %S/Inputs/basiclink.a.ll -o %t.foo.bc
+; RUN: llvm-as %S/Inputs/basiclink.b.ll -o %t.bar.bc
 ; RUN: llvm-link %t.foo.bc %t.bar.bc -o %t.bc
-declare i32* @foo(...)
-define i32* @bar() {
-	%ret = call i32* (...)* @foo( i32 123 )
-	ret i32* %ret
-}
-@baz = global i32 0
+; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc
diff --git a/test/Linker/link-archive.ll b/test/Linker/link-archive.ll
deleted file mode 100644
index 9251b4e..0000000
--- a/test/Linker/link-archive.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; Test linking of a bc file to an archive via llvm-ld. 
-; PR1434
-; RUN: rm -f %t.bar.a %t.foo.a
-; RUN: llvm-as %s -o %t.bar.bc
-; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
-; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
-; RUN: llvm-ar rcf %t.foo.a %t.foo.bc
-; RUN: llvm-ar rcf %t.bar.a %t.bar.bc
-; RUN: llvm-ld -disable-opt %t.bar.bc %t.foo.a -o %t.bc 
-; RUN: llvm-ld -disable-opt %t.foo.bc %t.bar.a -o %t.bc
-declare i32* @foo(...)
-define i32* @bar() {
-	%ret = call i32* (...)* @foo( i32 123 )
-	ret i32* %ret
-}
-@baz = global i32 0
diff --git a/test/Linker/link-global-to-func.ll b/test/Linker/link-global-to-func.ll
index 2fc501d..9d969d7 100644
--- a/test/Linker/link-global-to-func.ll
+++ b/test/Linker/link-global-to-func.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as %s -o %t1.bc
-; RUN: echo {declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }} | llvm-as -o %t2.bc
+; RUN: echo "declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }" | llvm-as -o %t2.bc
 ; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf
 ; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf
 
diff --git a/test/Linker/link-messages.ll b/test/Linker/link-messages.ll
index 920782d..4e7ffbc 100644
--- a/test/Linker/link-messages.ll
+++ b/test/Linker/link-messages.ll
@@ -2,10 +2,9 @@
 ; that error is printed out.
 ; RUN: llvm-as %s -o %t.one.bc
 ; RUN: llvm-as %s -o %t.two.bc
-; RUN: not llvm-ld -disable-opt -link-as-library %t.one.bc %t.two.bc \
-; RUN:   -o %t.bc 2>%t.err 
-; RUN: grep "symbol multiply defined" %t.err
+; RUN: not llvm-link %t.one.bc %t.two.bc -o %t.bc 2>&1 | FileCheck %s
 
+; CHECK: symbol multiply defined
 define i32 @bar() {
-	ret i32 0
+  ret i32 0
 }
diff --git a/test/Linker/linkage.ll b/test/Linker/linkage.ll
new file mode 100644
index 0000000..c7309aa
--- /dev/null
+++ b/test/Linker/linkage.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as %S/Inputs/linkage.a.ll -o %t.1.bc
+; RUN: llvm-as %S/Inputs/linkage.b.ll -o %t.2.bc
+; RUN: llvm-link %t.1.bc  %t.2.bc
diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll
index f411a56..a656c8b 100644
--- a/test/Linker/module-flags-4-a.ll
+++ b/test/Linker/module-flags-4-a.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - |& FileCheck %s
+; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - 2>&1 | FileCheck %s
 
 ; Test 'require' error.
 
diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll
index 2e59ecc..8d625cd 100644
--- a/test/Linker/module-flags-5-a.ll
+++ b/test/Linker/module-flags-5-a.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - |& FileCheck %s
+; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - 2>&1 | FileCheck %s
 
 ; Test the 'override' error.
 
diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll
index c3e0225..5329c43 100644
--- a/test/Linker/module-flags-6-a.ll
+++ b/test/Linker/module-flags-6-a.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - |& FileCheck %s
+; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - 2>&1 | FileCheck %s
 
 ; Test module flags error messages.
 
diff --git a/test/Linker/multiple-merged-structs.ll b/test/Linker/multiple-merged-structs.ll
index 348cd89..aa8204d 100644
--- a/test/Linker/multiple-merged-structs.ll
+++ b/test/Linker/multiple-merged-structs.ll
@@ -1,19 +1,2 @@
-; RUN: echo {%bug_type = type opaque \
-; RUN:     declare i32 @bug_a(%bug_type*) \
-; RUN:     declare i32 @bug_b(%bug_type*) } > %t.ll
-; RUN: llvm-link %t.ll %s
+; RUN: llvm-link %S/Inputs/PR11464.a.ll %S/Inputs/PR11464.b.ll
 ; PR11464
-
-%bug_type = type { %bug_type* }
-%bar = type { i32 }
-
-define i32 @bug_a(%bug_type* %fp) nounwind uwtable {
-entry:
-  %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0
-  ret i32 0
-}
-
-define i32 @bug_b(%bar* %a) nounwind uwtable {
-entry:
-  ret i32 0
-}
diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll
index 0d05689..23ba6a1 100644
--- a/test/Linker/redefinition.ll
+++ b/test/Linker/redefinition.ll
@@ -2,9 +2,9 @@
 ; in different modules.
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: llvm-as %s -o %t.foo2.bc
-; RUN: echo {define void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
-; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc |& \
-; RUN:   grep {symbol multiply defined}
-; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc |& \
-; RUN:   grep {symbol multiply defined}
+; RUN: echo "define void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc
+; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | \
+; RUN:   grep "symbol multiply defined"
+; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | \
+; RUN:   grep "symbol multiply defined"
 define void @foo() { ret void }
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index aa38b12..3a72a48 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,9 +1,9 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/testlink1.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
-; RUN: llvm-dis < %t1.bc | grep {kallsyms_names = extern_weak}
-; RUN: llvm-dis < %t1.bc | grep {MyVar = external global i32}
-; RUN: llvm-dis < %t1.bc | grep {Inte = global i32}
+; RUN: llvm-dis < %t1.bc | grep "kallsyms_names = extern_weak"
+; RUN: llvm-dis < %t1.bc | grep "MyVar = external global i32"
+; RUN: llvm-dis < %t1.bc | grep "Inte = global i32"
 
 @kallsyms_names = extern_weak global [0 x i8]		; <[0 x i8]*> [#uses=0]
 @MyVar = extern_weak global i32		; <i32*> [#uses=0]
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
index 74dfb99..99eb3c5 100644
--- a/test/MC/ARM/arm_fixups.s
+++ b/test/MC/ARM/arm_fixups.s
@@ -15,3 +15,8 @@
 @ CHECK: @   fixup A - offset: 0, value: _foo, kind: fixup_arm_movw_lo16
 @ CHECK: movt	r9, :upper16:_foo       @ encoding: [A,0x90'A',0b0100AAAA,0xe3]
 @ CHECK: @   fixup A - offset: 0, value: _foo, kind: fixup_arm_movt_hi16
+
+    mov r2, fred
+
+@ CHECK: movw  r2, fred                 @ encoding: [A,0x20'A',0b0000AAAA,0xe3]
+@ CHECK: @   fixup A - offset: 0, value: fred, kind: fixup_arm_movw_lo16
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index 186954c..ce7e036 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -74,3 +74,6 @@
 @ CHECK: cpsie none, #0                @ encoding: [0x00,0x00,0x0a,0xf1]
         cpsie none, #0
 
+@ CHECK: strh r3, [r2, #-0]            @ encoding: [0xb0,0x30,0x42,0xe1]
+        strh r3, [r2, #-0]
+
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 4788ac7..5c2a214 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
   .syntax unified
   .globl _func
 
@@ -141,6 +141,14 @@ Lforward:
 @ CHECK: adr	r2, #3                  @ encoding: [0x03,0x20,0x8f,0xe2]
 @ CHECK: adr	r2, #-3                 @ encoding: [0x03,0x20,0x4f,0xe2]
 
+        adr r1, #-0x0
+        adr r1, #-0x12000000
+        adr r1, #0x12000000
+
+@ CHECK: adr	r1, #-0                 @ encoding: [0x00,0x10,0x4f,0xe2]
+@ CHECK: adr	r1, #-301989888         @ encoding: [0x12,0x14,0x4f,0xe2]
+@ CHECK: adr	r1, #301989888          @ encoding: [0x12,0x14,0x8f,0xe2]
+
 
 @------------------------------------------------------------------------------
 @ ADD
@@ -206,6 +214,11 @@ Lforward:
 @ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
 @ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
 
+    @ Test right shift by 32, which is encoded as 0
+    add r3, r1, r2, lsr #32
+    add r3, r1, r2, asr #32
+@ CHECK: add	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0x81,0xe0]
+@ CHECK: add	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x81,0xe0]
 
 @------------------------------------------------------------------------------
 @ AND
@@ -265,6 +278,12 @@ Lforward:
 @ CHECK: and	r6, r6, r7, ror r2      @ encoding: [0x77,0x62,0x06,0xe0]
 @ CHECK: and	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0x0a,0xe0]
 
+    @ Test right shift by 32, which is encoded as 0
+    and r3, r1, r2, lsr #32
+    and r3, r1, r2, asr #32
+@ CHECK: and	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0x01,0xe0]
+@ CHECK: and	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x01,0xe0]
+
 @------------------------------------------------------------------------------
 @ ASR
 @------------------------------------------------------------------------------
@@ -368,6 +387,12 @@ Lforward:
 @ CHECK: bic	r6, r6, r7, ror r2      @ encoding: [0x77,0x62,0xc6,0xe1]
 @ CHECK: bic	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0xca,0xe1]
 
+    @ Test right shift by 32, which is encoded as 0
+    bic r3, r1, r2, lsr #32
+    bic r3, r1, r2, asr #32
+@ CHECK: bic	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0xc1,0xe1]
+@ CHECK: bic	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0xc1,0xe1]
+
 @------------------------------------------------------------------------------
 @ BKPT
 @------------------------------------------------------------------------------
@@ -542,6 +567,23 @@ Lforward:
 @------------------------------------------------------------------------------
 @ DMB
 @------------------------------------------------------------------------------
+        dmb #0xf
+        dmb #0xe
+        dmb #0xd
+        dmb #0xc
+        dmb #0xb
+        dmb #0xa
+        dmb #0x9
+        dmb #0x8
+        dmb #0x7
+        dmb #0x6
+        dmb #0x5
+        dmb #0x4
+        dmb #0x3
+        dmb #0x2
+        dmb #0x1
+        dmb #0x0
+
         dmb sy
         dmb st
         dmb sh
@@ -558,6 +600,23 @@ Lforward:
 
 @ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
 @ CHECK: dmb	st                      @ encoding: [0x5e,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0xd                    @ encoding: [0x5d,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0xc                    @ encoding: [0x5c,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x9                    @ encoding: [0x59,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x8                    @ encoding: [0x58,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nsh                     @ encoding: [0x57,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nshst                   @ encoding: [0x56,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x5                    @ encoding: [0x55,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x4                    @ encoding: [0x54,0xf0,0x7f,0xf5]
+@ CHECK: dmb	osh                     @ encoding: [0x53,0xf0,0x7f,0xf5]
+@ CHECK: dmb	oshst                   @ encoding: [0x52,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x1                    @ encoding: [0x51,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x0                    @ encoding: [0x50,0xf0,0x7f,0xf5]
+
+@ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
+@ CHECK: dmb	st                      @ encoding: [0x5e,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
@@ -573,6 +632,26 @@ Lforward:
 @------------------------------------------------------------------------------
 @ DSB
 @------------------------------------------------------------------------------
+        dsb #0xf
+        dsb #0xe
+        dsb #0xd
+        dsb #0xc
+        dsb #0xb
+        dsb #0xa
+        dsb #0x9
+        dsb #0x8
+        dsb #0x7
+        dsb #0x6
+        dsb #0x5
+        dsb #0x4
+        dsb #0x3
+        dsb #0x2
+        dsb #0x1
+        dsb #0x0
+
+        dsb 8
+        dsb 7
+
         dsb sy
         dsb st
         dsb sh
@@ -589,6 +668,26 @@ Lforward:
 
 @ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
 @ CHECK: dsb	st                      @ encoding: [0x4e,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0xd                    @ encoding: [0x4d,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0xc                    @ encoding: [0x4c,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x9                    @ encoding: [0x49,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x8                    @ encoding: [0x48,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nshst                   @ encoding: [0x46,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x5                    @ encoding: [0x45,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x4                    @ encoding: [0x44,0xf0,0x7f,0xf5]
+@ CHECK: dsb	osh                     @ encoding: [0x43,0xf0,0x7f,0xf5]
+@ CHECK: dsb	oshst                   @ encoding: [0x42,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x1                    @ encoding: [0x41,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x0                    @ encoding: [0x40,0xf0,0x7f,0xf5]
+
+@ CHECK: dsb	#0x8                    @ encoding: [0x48,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+
+@ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
+@ CHECK: dsb	st                      @ encoding: [0x4e,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
@@ -601,6 +700,12 @@ Lforward:
 @ CHECK: dsb	oshst                   @ encoding: [0x42,0xf0,0x7f,0xf5]
 @ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
 
+@ With capitals
+        dsb SY
+        dsb OSHST
+
+@ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
+@ CHECK: dsb	oshst                   @ encoding: [0x42,0xf0,0x7f,0xf5]
 @------------------------------------------------------------------------------
 @ EOR
 @------------------------------------------------------------------------------
@@ -658,6 +763,11 @@ Lforward:
 @ CHECK: eor	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x26,0xe0]
 @ CHECK: eor	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x24,0xe0]
 
+    @ Test right shift by 32, which is encoded as 0
+    eor r3, r1, r2, lsr #32
+    eor r3, r1, r2, asr #32
+@ CHECK: eor	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0x21,0xe0]
+@ CHECK: eor	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x21,0xe0]
 
 @------------------------------------------------------------------------------
 @ ISB
@@ -1205,6 +1315,12 @@ Lforward:
 @ CHECK: orrslt	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x96,0xb1]
 @ CHECK: orrsgt	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x94,0xc1]
 
+    @ Test right shift by 32, which is encoded as 0
+    orr r3, r1, r2, lsr #32
+    orr r3, r1, r2, asr #32
+@ CHECK: orr	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0x81,0xe1]
+@ CHECK: orr	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x81,0xe1]
+
 @------------------------------------------------------------------------------
 @ PKH
 @------------------------------------------------------------------------------
@@ -2210,6 +2326,11 @@ Lforward:
 @ CHECK: sub	r6, r6, r7, asr r9      @ encoding: [0x57,0x69,0x46,0xe0]
 @ CHECK: sub	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x46,0xe0]
 
+    @ Test right shift by 32, which is encoded as 0
+    sub r3, r1, r2, lsr #32
+    sub r3, r1, r2, asr #32
+@ CHECK: sub	r3, r1, r2, lsr #32     @ encoding: [0x22,0x30,0x41,0xe0]
+@ CHECK: sub	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x41,0xe0]
 
 @------------------------------------------------------------------------------
 @ SVC
@@ -2711,10 +2832,22 @@ Lforward:
         wfilt
         yield
         yieldne
-
-@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3]
-@ CHECK: wfehi @ encoding: [0x02,0xf0,0x20,0x83]
-@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3]
-@ CHECK: wfilt @ encoding: [0x03,0xf0,0x20,0xb3]
-@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3]
-@ CHECK: yieldne @ encoding: [0x01,0xf0,0x20,0x13]
+        hint #5
+        hint #4
+        hint #3
+        hint #2
+        hint #1
+        hint #0
+
+@ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
+@ CHECK: wfehi                          @ encoding: [0x02,0xf0,0x20,0x83]
+@ CHECK: wfi                            @ encoding: [0x03,0xf0,0x20,0xe3]
+@ CHECK: wfilt                          @ encoding: [0x03,0xf0,0x20,0xb3]
+@ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
+@ CHECK: yieldne                        @ encoding: [0x01,0xf0,0x20,0x13]
+@ CHECK: hint	#5                      @ encoding: [0x05,0xf0,0x20,0xe3]
+@ CHECK: sev                            @ encoding: [0x04,0xf0,0x20,0xe3]
+@ CHECK: wfi                            @ encoding: [0x03,0xf0,0x20,0xe3]
+@ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
+@ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
+@ CHECK: nop                            @ encoding: [0x00,0xf0,0x20,0xe3]
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index bc2605c..4ee34ce 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -169,9 +169,9 @@ _func:
         bl _bar
         blx _baz
 
-@ CHECK: bl	_bar                    @ encoding: [A,0xf0'A',A,0xf8'A']
+@ CHECK: bl	_bar                    @ encoding: [A,0xf0'A',A,0xd0'A']
              @   fixup A - offset: 0, value: _bar, kind: fixup_arm_thumb_bl
-@ CHECK: blx	_baz                    @ encoding: [A,0xf0'A',A,0xe8'A']
+@ CHECK: blx	_baz                    @ encoding: [A,0xf0'A',A,0xc0'A']
              @   fixup A - offset: 0, value: _baz, kind: fixup_arm_thumb_blx
 
 
@@ -635,13 +635,3 @@ _func:
 @ CHECK: uxth	r1, r4                  @ encoding: [0xa1,0xb2]
 
 
-@------------------------------------------------------------------------------
-@ WFE/WFI/YIELD
-@------------------------------------------------------------------------------
-        wfe
-        wfi
-        yield
-
-@ CHECK: wfe                             @ encoding: [0x20,0xbf]
-@ CHECK: wfi                             @ encoding: [0x30,0xbf]
-@ CHECK: yield                           @ encoding: [0x10,0xbf]
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index d2e208b..23d9f59 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
   .syntax unified
   .globl _func
 
@@ -48,6 +48,7 @@ _func:
         adcs	r0, r1, r3, lsl #7
         adc.w	r0, r1, r3, lsr #31
         adcs.w	r0, r1, r3, asr #32
+        add r2, sp, ip
 
 @ CHECK: adc.w	r4, r5, r6              @ encoding: [0x45,0xeb,0x06,0x04]
 @ CHECK: adcs.w	r4, r5, r6              @ encoding: [0x55,0xeb,0x06,0x04]
@@ -57,6 +58,7 @@ _func:
 @ CHECK: adcs.w	r0, r1, r3, lsl #7      @ encoding: [0x51,0xeb,0xc3,0x10]
 @ CHECK: adc.w	r0, r1, r3, lsr #31     @ encoding: [0x41,0xeb,0xd3,0x70]
 @ CHECK: adcs.w	r0, r1, r3, asr #32     @ encoding: [0x51,0xeb,0x23,0x00]
+@ CHECK: add.w	r2, sp, r12             @ encoding: [0x0d,0xeb,0x0c,0x02]
 
 
 @------------------------------------------------------------------------------
@@ -78,6 +80,12 @@ _func:
         adds r2, r2, #56
         adds r2, #56
 
+        adds.w r2, #-16
+        adds.w r2, r2, #-16
+        addw r2, #-16
+        addw r2, #-16
+        addw r2, r2, #-16
+
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
 @ CHECK: addwne	r5, r3, #1023           @ encoding: [0x03,0xf2,0xff,0x35]
@@ -94,6 +102,12 @@ _func:
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 
+@ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
+@ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
+@ CHECK: subw	r2, r2, #16             @ encoding: [0xa2,0xf2,0x10,0x02]
+@ CHECK: subw	r2, r2, #16             @ encoding: [0xa2,0xf2,0x10,0x02]
+@ CHECK: subw	r2, r2, #16             @ encoding: [0xa2,0xf2,0x10,0x02]
+
 
 @------------------------------------------------------------------------------
 @ ADD (register)
@@ -121,9 +135,11 @@ _func:
 
         subw r11, pc, #3270
         adr.w r11, #-826
+        adr.w r1, #-0x0
 
 @ CHECK: subw	r11, pc, #3270          @ encoding: [0xaf,0xf6,0xc6,0x4b]
 @ CHECK: adr.w	r11, #-826              @ encoding: [0xaf,0xf2,0x3a,0x3b]
+@ CHECK: adr.w	r1, #-0                 @ encoding: [0xaf,0xf2,0x00,0x01]
 
 @------------------------------------------------------------------------------
 @ AND (immediate)
@@ -401,6 +417,23 @@ _func:
 @------------------------------------------------------------------------------
 @ DMB
 @------------------------------------------------------------------------------
+        dmb #0xf
+        dmb #0xe
+        dmb #0xd
+        dmb #0xc
+        dmb #0xb
+        dmb #0xa
+        dmb #0x9
+        dmb #0x8
+        dmb #0x7
+        dmb #0x6
+        dmb #0x5
+        dmb #0x4
+        dmb #0x3
+        dmb #0x2
+        dmb #0x1
+        dmb #0x0
+
         dmb sy
         dmb st
         dmb sh
@@ -417,6 +450,23 @@ _func:
 
 @ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
 @ CHECK: dmb	st                      @ encoding: [0xbf,0xf3,0x5e,0x8f]
+@ CHECK: dmb	#0xd                    @ encoding: [0xbf,0xf3,0x5d,0x8f]
+@ CHECK: dmb	#0xc                    @ encoding: [0xbf,0xf3,0x5c,0x8f]
+@ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
+@ CHECK: dmb	ishst                   @ encoding: [0xbf,0xf3,0x5a,0x8f]
+@ CHECK: dmb	#0x9                    @ encoding: [0xbf,0xf3,0x59,0x8f]
+@ CHECK: dmb	#0x8                    @ encoding: [0xbf,0xf3,0x58,0x8f]
+@ CHECK: dmb	nsh                     @ encoding: [0xbf,0xf3,0x57,0x8f]
+@ CHECK: dmb	nshst                   @ encoding: [0xbf,0xf3,0x56,0x8f]
+@ CHECK: dmb	#0x5                    @ encoding: [0xbf,0xf3,0x55,0x8f]
+@ CHECK: dmb	#0x4                    @ encoding: [0xbf,0xf3,0x54,0x8f]
+@ CHECK: dmb	osh                     @ encoding: [0xbf,0xf3,0x53,0x8f]
+@ CHECK: dmb	oshst                   @ encoding: [0xbf,0xf3,0x52,0x8f]
+@ CHECK: dmb	#0x1                    @ encoding: [0xbf,0xf3,0x51,0x8f]
+@ CHECK: dmb	#0x0                    @ encoding: [0xbf,0xf3,0x50,0x8f]
+
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dmb	st                      @ encoding: [0xbf,0xf3,0x5e,0x8f]
 @ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
 @ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
 @ CHECK: dmb	ishst                   @ encoding: [0xbf,0xf3,0x5a,0x8f]
@@ -433,6 +483,23 @@ _func:
 @------------------------------------------------------------------------------
 @ DSB
 @------------------------------------------------------------------------------
+        dsb #0xf
+        dsb #0xe
+        dsb #0xd
+        dsb #0xc
+        dsb #0xb
+        dsb #0xa
+        dsb #0x9
+        dsb #0x8
+        dsb #0x7
+        dsb #0x6
+        dsb #0x5
+        dsb #0x4
+        dsb #0x3
+        dsb #0x2
+        dsb #0x1
+        dsb #0x0
+
         dsb sy
         dsb st
         dsb sh
@@ -449,6 +516,23 @@ _func:
 
 @ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
 @ CHECK: dsb	st                      @ encoding: [0xbf,0xf3,0x4e,0x8f]
+@ CHECK: dsb	#0xd                    @ encoding: [0xbf,0xf3,0x4d,0x8f]
+@ CHECK: dsb	#0xc                    @ encoding: [0xbf,0xf3,0x4c,0x8f]
+@ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
+@ CHECK: dsb	ishst                   @ encoding: [0xbf,0xf3,0x4a,0x8f]
+@ CHECK: dsb	#0x9                    @ encoding: [0xbf,0xf3,0x49,0x8f]
+@ CHECK: dsb	#0x8                    @ encoding: [0xbf,0xf3,0x48,0x8f]
+@ CHECK: dsb	nsh                     @ encoding: [0xbf,0xf3,0x47,0x8f]
+@ CHECK: dsb	nshst                   @ encoding: [0xbf,0xf3,0x46,0x8f]
+@ CHECK: dsb	#0x5                    @ encoding: [0xbf,0xf3,0x45,0x8f]
+@ CHECK: dsb	#0x4                    @ encoding: [0xbf,0xf3,0x44,0x8f]
+@ CHECK: dsb	osh                     @ encoding: [0xbf,0xf3,0x43,0x8f]
+@ CHECK: dsb	oshst                   @ encoding: [0xbf,0xf3,0x42,0x8f]
+@ CHECK: dsb	#0x1                    @ encoding: [0xbf,0xf3,0x41,0x8f]
+@ CHECK: dsb	#0x0                    @ encoding: [0xbf,0xf3,0x40,0x8f]
+
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: dsb	st                      @ encoding: [0xbf,0xf3,0x4e,0x8f]
 @ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
 @ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
 @ CHECK: dsb	ishst                   @ encoding: [0xbf,0xf3,0x4a,0x8f]
@@ -509,6 +593,19 @@ _func:
 @ CHECK: subne	r5, r6, r7              @ encoding: [0xf5,0x1b]
 @ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
 
+@ Should also work for UPPER CASE condition codes.
+
+        ITEET EQ
+        ADDEQ R0, R1, R2
+        NOPNE
+        SUBNE R5, R6, R7
+        ADDEQ R1, R2, #4
+
+@ CHECK: iteet	eq                      @ encoding: [0x0d,0xbf]
+@ CHECK: addeq	r0, r1, r2              @ encoding: [0x88,0x18]
+@ CHECK: nopne                          @ encoding: [0x00,0xbf]
+@ CHECK: subne	r5, r6, r7              @ encoding: [0xf5,0x1b]
+@ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
 
 @------------------------------------------------------------------------------
 @ LDC{L}/LDC2{L}
@@ -755,6 +852,9 @@ _func:
         ldrd r3, r5, [r6], #-8
         ldrd r3, r5, [r6]
         ldrd r8, r1, [r3, #0]
+        ldrd r0, r1, [r2, #-0]
+        ldrd r0, r1, [r2, #-0]!
+        ldrd r0, r1, [r2], #-0
 
 @ CHECK: ldrd	r3, r5, [r6, #24]       @ encoding: [0xd6,0xe9,0x06,0x35]
 @ CHECK: ldrd	r3, r5, [r6, #24]!      @ encoding: [0xf6,0xe9,0x06,0x35]
@@ -762,6 +862,9 @@ _func:
 @ CHECK: ldrd	r3, r5, [r6], #-8       @ encoding: [0x76,0xe8,0x02,0x35]
 @ CHECK: ldrd	r3, r5, [r6]            @ encoding: [0xd6,0xe9,0x00,0x35]
 @ CHECK: ldrd	r8, r1, [r3]            @ encoding: [0xd3,0xe9,0x00,0x81]
+@ CHECK: ldrd	r0, r1, [r2, #-0]       @ encoding: [0x52,0xe9,0x00,0x01]
+@ CHECK: ldrd	r0, r1, [r2, #-0]!      @ encoding: [0x72,0xe9,0x00,0x01]
+@ CHECK: ldrd	r0, r1, [r2], #-0       @ encoding: [0x72,0xe8,0x00,0x01]
 
 
 @------------------------------------------------------------------------------
@@ -2539,6 +2642,9 @@ _func:
         strd r3, r5, [r6], #-8
         strd r3, r5, [r6]
         strd r8, r1, [r3, #0]
+        strd r0, r1, [r2, #-0]
+        strd r0, r1, [r2, #-0]!
+        strd r0, r1, [r2], #-0
 
 @ CHECK: strd	r3, r5, [r6, #24]       @ encoding: [0xc6,0xe9,0x06,0x35]
 @ CHECK: strd	r3, r5, [r6, #24]!      @ encoding: [0xe6,0xe9,0x06,0x35]
@@ -2546,6 +2652,9 @@ _func:
 @ CHECK: strd	r3, r5, [r6], #-8       @ encoding: [0x66,0xe8,0x02,0x35]
 @ CHECK: strd	r3, r5, [r6]            @ encoding: [0xc6,0xe9,0x00,0x35]
 @ CHECK: strd	r8, r1, [r3]            @ encoding: [0xc3,0xe9,0x00,0x81]
+@ CHECK: strd   r0, r1, [r2, #-0]       @ encoding: [0x42,0xe9,0x00,0x01]
+@ CHECK: strd   r0, r1, [r2, #-0]!      @ encoding: [0x62,0xe9,0x00,0x01]
+@ CHECK: strd   r0, r1, [r2], #-0       @ encoding: [0x62,0xe8,0x00,0x01]
 
 
 @------------------------------------------------------------------------------
@@ -3342,7 +3451,7 @@ _func:
 @ CHECK: uxth.w	r7, r8                  @ encoding: [0x1f,0xfa,0x88,0xf7]
 
 @------------------------------------------------------------------------------
-@ WFE/WFI/YIELD
+@ WFE/WFI/YIELD/HINT
 @------------------------------------------------------------------------------
         wfe
         wfi
@@ -3351,6 +3460,13 @@ _func:
         wfelt
         wfige
         yieldlt
+        hint #5
+        hint.w #5
+        hint.w #4
+        hint #3
+        hint #2
+        hint #1
+        hint #0
 
 @ CHECK: wfe                            @ encoding: [0x20,0xbf]
 @ CHECK: wfi                            @ encoding: [0x30,0xbf]
@@ -3359,6 +3475,13 @@ _func:
 @ CHECK: wfelt                          @ encoding: [0x20,0xbf]
 @ CHECK: wfige                          @ encoding: [0x30,0xbf]
 @ CHECK: yieldlt                        @ encoding: [0x10,0xbf]
+@ CHECK: hint	#5                      @ encoding: [0xaf,0xf3,0x05,0x80]
+@ CHECK: hint	#5                      @ encoding: [0xaf,0xf3,0x05,0x80]
+@ CHECK: sev.w                          @ encoding: [0xaf,0xf3,0x04,0x80]
+@ CHECK: wfi.w                          @ encoding: [0xaf,0xf3,0x03,0x80]
+@ CHECK: wfe.w                          @ encoding: [0xaf,0xf3,0x02,0x80]
+@ CHECK: yield.w                        @ encoding: [0xaf,0xf3,0x01,0x80]
+@ CHECK: nop.w                          @ encoding: [0xaf,0xf3,0x00,0x80]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 7da79c3..499e05501 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -70,8 +70,8 @@
         dbg #-1
         dbg #16
 
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 @  Double-check that we're synced up with the right diagnostics.
 @ CHECK-ERRORS: dbg #16
 
@@ -86,8 +86,8 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 
 
         @ Out of range immediate for MOV
@@ -115,8 +115,8 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 
         @ Shifter operand validation for PKH instructions.
         pkhbt r2, r2, r3, lsl #-1
@@ -315,3 +315,9 @@
 @ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255]
 @ CHECK-ERRORS:         ldc2 p2, c8, [r1], { -1 }
 @ CHECK-ERRORS:                              ^
+
+        @ Bad CPS instruction format.
+        cps f,#1
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS:         cps f,#1
+@ CHECK-ERRORS:               ^
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 6899d92..c98026b 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -61,7 +61,7 @@ bb3:                                              ; preds = %bb, %entry
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:          Relocation 1
-;; OBJ-NEXT:     'r_offset', 
+;; OBJ-NEXT:     'r_offset',
 ;; OBJ-NEXT:     'r_sym', 0x000002
 ;; OBJ-NEXT:     'r_type', 0x2b
 
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index dcc62d3..08b4ecc 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -4,6 +4,8 @@
         bleq some_label
         bl some_label
         blx some_label
+        beq some_label
+        b some_label
 // OBJ: .rel.text
 
 // OBJ: 'r_offset', 0x00000000
@@ -18,6 +20,14 @@
 // OBJ-NEXT:  'r_sym', 0x000004
 // OBJ-NEXT: 'r_type', 0x1c
 
+// OBJ: 'r_offset', 0x0000000c
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1d
+
+// OBJ: 'r_offset', 0x00000010
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1d
+
 // OBJ: .symtab
 // OBJ: Symbol 4
-// OBJ-NEXT: some_label
-\ No newline at end of file
+// OBJ-NEXT: some_label
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index 2ce9bcc..e8c1dd6 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -30,11 +30,16 @@
 	vbic	q8, q8, q9
 	vbic.i32	d16, #0xFF000000
 	vbic.i32	q8, #0xFF000000
+        vbic q10, q11
+        vbic d9, d1
 
 @ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
 @ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
 @ CHECK: vbic.i32	d16, #0xff000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
 @ CHECK: vbic.i32	q8, #0xff000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+@ CHECK: vbic	q10, q10, q11           @ encoding: [0xf6,0x41,0x54,0xf2]
+@ CHECK: vbic	d9, d9, d1              @ encoding: [0x11,0x91,0x19,0xf2]
+
 
 	vorn	d16, d17, d16
 	vorn	q8, q8, q9
@@ -232,32 +237,38 @@
 @ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
 
 @ Two-operand aliases
+	vand  q6, q5
 	vand.s8  q6, q5
 	vand.s16 q7, q1
 	vand.s32 q8, q2
 	vand.f64 q8, q2
 
+	veor   q6, q5
 	veor.8   q6, q5
 	veor.p16 q7, q1
 	veor.u32 q8, q2
 	veor.d   q8, q2
 
+	veor  q6, q5
 	veor.i8  q6, q5
 	veor.16  q7, q1
 	veor.f   q8, q2
 	veor.i64 q8, q2
 
 @ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
+@ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
 @ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
 @ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
 @ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
 
 @ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
 @ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
 
 @ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
 @ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
diff --git a/test/MC/ARM/neon-shiftaccum-encoding.s b/test/MC/ARM/neon-shiftaccum-encoding.s
new file mode 100644
index 0000000..92ca7a3
--- /dev/null
+++ b/test/MC/ARM/neon-shiftaccum-encoding.s
@@ -0,0 +1,209 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+	vsra.s8 d17, d16, #8
+	vsra.s16 d15, d14, #16
+	vsra.s32 d13, d12, #32
+	vsra.s64 d11, d10, #64
+	vsra.s8 q7, q2, #8
+	vsra.s16 q3, q6, #16
+	vsra.s32 q9, q5, #32
+	vsra.s64 q8, q4, #64
+	vsra.u8 d17, d16, #8
+	vsra.u16 d11, d14, #11
+	vsra.u32 d12, d15, #22
+	vsra.u64 d13, d16, #54
+	vsra.u8 q1, q7, #8
+	vsra.u16 q2, q7, #6
+	vsra.u32 q3, q6, #21
+	vsra.u64 q4, q5, #25
+
+        @ Two-operand syntax variant.
+	vsra.s8 d16, #8
+	vsra.s16 d14, #16
+	vsra.s32 d12, #32
+	vsra.s64 d10, #64
+	vsra.s8 q2, #8
+	vsra.s16 q6, #16
+	vsra.s32 q5, #32
+	vsra.s64 q4, #64
+	vsra.u8 d16, #8
+	vsra.u16 d14, #11
+	vsra.u32 d15, #22
+	vsra.u64 d16, #54
+	vsra.u8 q7, #8
+	vsra.u16 q7, #6
+	vsra.u32 q6, #21
+	vsra.u64 q5, #25
+
+@ CHECK: vsra.s8	d17, d16, #8    @ encoding: [0x30,0x11,0xc8,0xf2]
+@ CHECK: vsra.s16	d15, d14, #16   @ encoding: [0x1e,0xf1,0x90,0xf2]
+@ CHECK: vsra.s32	d13, d12, #32   @ encoding: [0x1c,0xd1,0xa0,0xf2]
+@ CHECK: vsra.s64	d11, d10, #64   @ encoding: [0x9a,0xb1,0x80,0xf2]
+@ CHECK: vsra.s8	q7, q2, #8      @ encoding: [0x54,0xe1,0x88,0xf2]
+@ CHECK: vsra.s16	q3, q6, #16     @ encoding: [0x5c,0x61,0x90,0xf2]
+@ CHECK: vsra.s32	q9, q5, #32     @ encoding: [0x5a,0x21,0xe0,0xf2]
+@ CHECK: vsra.s64	q8, q4, #64     @ encoding: [0xd8,0x01,0xc0,0xf2]
+@ CHECK: vsra.u8	d17, d16, #8    @ encoding: [0x30,0x11,0xc8,0xf3]
+@ CHECK: vsra.u16	d11, d14, #11   @ encoding: [0x1e,0xb1,0x95,0xf3]
+@ CHECK: vsra.u32	d12, d15, #22   @ encoding: [0x1f,0xc1,0xaa,0xf3]
+@ CHECK: vsra.u64	d13, d16, #54   @ encoding: [0xb0,0xd1,0x8a,0xf3]
+@ CHECK: vsra.u8	q1, q7, #8      @ encoding: [0x5e,0x21,0x88,0xf3]
+@ CHECK: vsra.u16	q2, q7, #6      @ encoding: [0x5e,0x41,0x9a,0xf3]
+@ CHECK: vsra.u32	q3, q6, #21     @ encoding: [0x5c,0x61,0xab,0xf3]
+@ CHECK: vsra.u64	q4, q5, #25     @ encoding: [0xda,0x81,0xa7,0xf3]
+
+@ CHECK: vsra.s8	d16, d16, #8            @ encoding: [0x30,0x01,0xc8,0xf2]
+@ CHECK: vsra.s16	d14, d14, #16   @ encoding: [0x1e,0xe1,0x90,0xf2]
+@ CHECK: vsra.s32	d12, d12, #32   @ encoding: [0x1c,0xc1,0xa0,0xf2]
+@ CHECK: vsra.s64	d10, d10, #64   @ encoding: [0x9a,0xa1,0x80,0xf2]
+@ CHECK: vsra.s8	q2, q2, #8              @ encoding: [0x54,0x41,0x88,0xf2]
+@ CHECK: vsra.s16	q6, q6, #16     @ encoding: [0x5c,0xc1,0x90,0xf2]
+@ CHECK: vsra.s32	q5, q5, #32     @ encoding: [0x5a,0xa1,0xa0,0xf2]
+@ CHECK: vsra.s64	q4, q4, #64     @ encoding: [0xd8,0x81,0x80,0xf2]
+@ CHECK: vsra.u8	d16, d16, #8            @ encoding: [0x30,0x01,0xc8,0xf3]
+@ CHECK: vsra.u16	d14, d14, #11   @ encoding: [0x1e,0xe1,0x95,0xf3]
+@ CHECK: vsra.u32	d15, d15, #22   @ encoding: [0x1f,0xf1,0xaa,0xf3]
+@ CHECK: vsra.u64	d16, d16, #54   @ encoding: [0xb0,0x01,0xca,0xf3]
+@ CHECK: vsra.u8	q7, q7, #8              @ encoding: [0x5e,0xe1,0x88,0xf3]
+@ CHECK: vsra.u16	q7, q7, #6      @ encoding: [0x5e,0xe1,0x9a,0xf3]
+@ CHECK: vsra.u32	q6, q6, #21     @ encoding: [0x5c,0xc1,0xab,0xf3]
+@ CHECK: vsra.u64	q5, q5, #25     @ encoding: [0xda,0xa1,0xa7,0xf3]
+
+	vrsra.s8 d5, d26, #8
+	vrsra.s16 d6, d25, #16
+	vrsra.s32 d7, d24, #32
+	vrsra.s64 d14, d23, #64
+	vrsra.u8 d15, d22, #8
+	vrsra.u16 d16, d21, #16
+	vrsra.u32 d17, d20, #32
+	vrsra.u64 d18, d19, #64
+	vrsra.s8 q1, q2, #8
+	vrsra.s16 q2, q3, #16
+	vrsra.s32 q3, q4, #32
+	vrsra.s64 q4, q5, #64
+	vrsra.u8 q5, q6, #8
+	vrsra.u16 q6, q7, #16
+	vrsra.u32 q7, q8, #32
+	vrsra.u64 q8, q9, #64
+
+        @ Two-operand syntax variant.
+	vrsra.s8 d26, #8
+	vrsra.s16 d25, #16
+	vrsra.s32 d24, #32
+	vrsra.s64 d23, #64
+	vrsra.u8 d22, #8
+	vrsra.u16 d21, #16
+	vrsra.u32 d20, #32
+	vrsra.u64 d19, #64
+	vrsra.s8 q2, #8
+	vrsra.s16 q3, #16
+	vrsra.s32 q4, #32
+	vrsra.s64 q5, #64
+	vrsra.u8 q6, #8
+	vrsra.u16 q7, #16
+	vrsra.u32 q8, #32
+	vrsra.u64 q9, #64
+
+@ CHECK: vrsra.s8	d5, d26, #8     @ encoding: [0x3a,0x53,0x88,0xf2]
+@ CHECK: vrsra.s16	d6, d25, #16    @ encoding: [0x39,0x63,0x90,0xf2]
+@ CHECK: vrsra.s32	d7, d24, #32    @ encoding: [0x38,0x73,0xa0,0xf2]
+@ CHECK: vrsra.s64	d14, d23, #64   @ encoding: [0xb7,0xe3,0x80,0xf2]
+@ CHECK: vrsra.u8	d15, d22, #8    @ encoding: [0x36,0xf3,0x88,0xf3]
+@ CHECK: vrsra.u16	d16, d21, #16   @ encoding: [0x35,0x03,0xd0,0xf3]
+@ CHECK: vrsra.u32	d17, d20, #32   @ encoding: [0x34,0x13,0xe0,0xf3]
+@ CHECK: vrsra.u64	d18, d19, #64   @ encoding: [0xb3,0x23,0xc0,0xf3]
+@ CHECK: vrsra.s8	q1, q2, #8      @ encoding: [0x54,0x23,0x88,0xf2]
+@ CHECK: vrsra.s16	q2, q3, #16     @ encoding: [0x56,0x43,0x90,0xf2]
+@ CHECK: vrsra.s32	q3, q4, #32     @ encoding: [0x58,0x63,0xa0,0xf2]
+@ CHECK: vrsra.s64	q4, q5, #64     @ encoding: [0xda,0x83,0x80,0xf2]
+@ CHECK: vrsra.u8	q5, q6, #8      @ encoding: [0x5c,0xa3,0x88,0xf3]
+@ CHECK: vrsra.u16	q6, q7, #16     @ encoding: [0x5e,0xc3,0x90,0xf3]
+@ CHECK: vrsra.u32	q7, q8, #32     @ encoding: [0x70,0xe3,0xa0,0xf3]
+@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
+
+@ CHECK: vrsra.s8	d26, d26, #8    @ encoding: [0x3a,0xa3,0xc8,0xf2]
+@ CHECK: vrsra.s16	d25, d25, #16   @ encoding: [0x39,0x93,0xd0,0xf2]
+@ CHECK: vrsra.s32	d24, d24, #32   @ encoding: [0x38,0x83,0xe0,0xf2]
+@ CHECK: vrsra.s64	d23, d23, #64   @ encoding: [0xb7,0x73,0xc0,0xf2]
+@ CHECK: vrsra.u8	d22, d22, #8    @ encoding: [0x36,0x63,0xc8,0xf3]
+@ CHECK: vrsra.u16	d21, d21, #16   @ encoding: [0x35,0x53,0xd0,0xf3]
+@ CHECK: vrsra.u32	d20, d20, #32   @ encoding: [0x34,0x43,0xe0,0xf3]
+@ CHECK: vrsra.u64	d19, d19, #64   @ encoding: [0xb3,0x33,0xc0,0xf3]
+@ CHECK: vrsra.s8	q2, q2, #8      @ encoding: [0x54,0x43,0x88,0xf2]
+@ CHECK: vrsra.s16	q3, q3, #16     @ encoding: [0x56,0x63,0x90,0xf2]
+@ CHECK: vrsra.s32	q4, q4, #32     @ encoding: [0x58,0x83,0xa0,0xf2]
+@ CHECK: vrsra.s64	q5, q5, #64     @ encoding: [0xda,0xa3,0x80,0xf2]
+@ CHECK: vrsra.u8	q6, q6, #8      @ encoding: [0x5c,0xc3,0x88,0xf3]
+@ CHECK: vrsra.u16	q7, q7, #16     @ encoding: [0x5e,0xe3,0x90,0xf3]
+@ CHECK: vrsra.u32	q8, q8, #32     @ encoding: [0x70,0x03,0xe0,0xf3]
+@ CHECK: vrsra.u64	q9, q9, #64     @ encoding: [0xf2,0x23,0xc0,0xf3]
+
+
+	vsli.8 d11, d12, #7
+	vsli.16 d12, d13, #15
+	vsli.32 d13, d14, #31
+	vsli.64 d14, d15, #63
+	vsli.8 q1, q8, #7
+	vsli.16 q2, q7, #15
+	vsli.32 q3, q4, #31
+	vsli.64 q4, q5, #63
+	vsri.8 d28, d11, #8
+	vsri.16 d26, d12, #16
+	vsri.32 d24, d13, #32
+	vsri.64 d21, d14, #64
+	vsri.8 q1, q8, #8
+	vsri.16 q5, q2, #16
+	vsri.32 q7, q4, #32
+	vsri.64 q9, q6, #64
+
+        @ Two-operand syntax variant.
+	vsli.8 d12, #7
+	vsli.16 d13, #15
+	vsli.32 d14, #31
+	vsli.64 d15, #63
+	vsli.8 q8, #7
+	vsli.16 q7, #15
+	vsli.32 q4, #31
+	vsli.64 q5, #63
+	vsri.8 d11, #8
+	vsri.16 d12, #16
+	vsri.32 d13, #32
+	vsri.64 d14, #64
+	vsri.8 q8, #8
+	vsri.16 q2, #16
+	vsri.32 q4, #32
+	vsri.64 q6, #64
+
+@ CHECK: vsli.8	d11, d12, #7            @ encoding: [0x1c,0xb5,0x8f,0xf3]
+@ CHECK: vsli.16	d12, d13, #15   @ encoding: [0x1d,0xc5,0x9f,0xf3]
+@ CHECK: vsli.32	d13, d14, #31   @ encoding: [0x1e,0xd5,0xbf,0xf3]
+@ CHECK: vsli.64	d14, d15, #63   @ encoding: [0x9f,0xe5,0xbf,0xf3]
+@ CHECK: vsli.8	q1, q8, #7              @ encoding: [0x70,0x25,0x8f,0xf3]
+@ CHECK: vsli.16	q2, q7, #15     @ encoding: [0x5e,0x45,0x9f,0xf3]
+@ CHECK: vsli.32	q3, q4, #31     @ encoding: [0x58,0x65,0xbf,0xf3]
+@ CHECK: vsli.64	q4, q5, #63     @ encoding: [0xda,0x85,0xbf,0xf3]
+@ CHECK: vsri.8	d28, d11, #8            @ encoding: [0x1b,0xc4,0xc8,0xf3]
+@ CHECK: vsri.16	d26, d12, #16   @ encoding: [0x1c,0xa4,0xd0,0xf3]
+@ CHECK: vsri.32	d24, d13, #32   @ encoding: [0x1d,0x84,0xe0,0xf3]
+@ CHECK: vsri.64	d21, d14, #64   @ encoding: [0x9e,0x54,0xc0,0xf3]
+@ CHECK: vsri.8	q1, q8, #8              @ encoding: [0x70,0x24,0x88,0xf3]
+@ CHECK: vsri.16	q5, q2, #16     @ encoding: [0x54,0xa4,0x90,0xf3]
+@ CHECK: vsri.32	q7, q4, #32     @ encoding: [0x58,0xe4,0xa0,0xf3]
+@ CHECK: vsri.64	q9, q6, #64     @ encoding: [0xdc,0x24,0xc0,0xf3]
+
+@ CHECK: vsli.8	d12, d12, #7            @ encoding: [0x1c,0xc5,0x8f,0xf3]
+@ CHECK: vsli.16	d13, d13, #15           @ encoding: [0x1d,0xd5,0x9f,0xf3]
+@ CHECK: vsli.32	d14, d14, #31           @ encoding: [0x1e,0xe5,0xbf,0xf3]
+@ CHECK: vsli.64	d15, d15, #63           @ encoding: [0x9f,0xf5,0xbf,0xf3]
+@ CHECK: vsli.8	q8, q8, #7              @ encoding: [0x70,0x05,0xcf,0xf3]
+@ CHECK: vsli.16	q7, q7, #15             @ encoding: [0x5e,0xe5,0x9f,0xf3]
+@ CHECK: vsli.32	q4, q4, #31             @ encoding: [0x58,0x85,0xbf,0xf3]
+@ CHECK: vsli.64	q5, q5, #63             @ encoding: [0xda,0xa5,0xbf,0xf3]
+@ CHECK: vsri.8	d11, d11, #8            @ encoding: [0x1b,0xb4,0x88,0xf3]
+@ CHECK: vsri.16	d12, d12, #16           @ encoding: [0x1c,0xc4,0x90,0xf3]
+@ CHECK: vsri.32	d13, d13, #32           @ encoding: [0x1d,0xd4,0xa0,0xf3]
+@ CHECK: vsri.64	d14, d14, #64           @ encoding: [0x9e,0xe4,0x80,0xf3]
+@ CHECK: vsri.8	q8, q8, #8              @ encoding: [0x70,0x04,0xc8,0xf3]
+@ CHECK: vsri.16	q2, q2, #16             @ encoding: [0x54,0x44,0x90,0xf3]
+@ CHECK: vsri.32	q4, q4, #32             @ encoding: [0x58,0x84,0xa0,0xf3]
+@ CHECK: vsri.64	q6, q6, #64             @ encoding: [0xdc,0xc4,0x80,0xf3]
diff --git a/test/MC/ARM/neon-sub-encoding.s b/test/MC/ARM/neon-sub-encoding.s
index 8eb38a5..be67aa8 100644
--- a/test/MC/ARM/neon-sub-encoding.s
+++ b/test/MC/ARM/neon-sub-encoding.s
@@ -158,3 +158,18 @@
 @ CHECK: vhsub.u8	q4, q4, q9      @ encoding: [0x62,0x82,0x08,0xf3]
 @ CHECK: vhsub.u16	q5, q5, q8      @ encoding: [0x60,0xa2,0x1a,0xf3]
 @ CHECK: vhsub.u32	q6, q6, q7      @ encoding: [0x4e,0xc2,0x2c,0xf3]
+
+
+	vsubw.s8  q6, d5
+	vsubw.s16 q7, d1
+	vsubw.s32 q8, d2
+	vsubw.u8  q6, d5
+	vsubw.u16 q7, d1
+	vsubw.u32 q8, d2
+
+@ CHECK: vsubw.s8	q6, q6, d5      @ encoding: [0x05,0xc3,0x8c,0xf2]
+@ CHECK: vsubw.s16	q7, q7, d1      @ encoding: [0x01,0xe3,0x9e,0xf2]
+@ CHECK: vsubw.s32	q8, q8, d2      @ encoding: [0x82,0x03,0xe0,0xf2]
+@ CHECK: vsubw.u8	q6, q6, d5      @ encoding: [0x05,0xc3,0x8c,0xf3]
+@ CHECK: vsubw.u16	q7, q7, d1      @ encoding: [0x01,0xe3,0x9e,0xf3]
+@ CHECK: vsubw.u32	q8, q8, d2      @ encoding: [0x82,0x03,0xe0,0xf3]
diff --git a/test/MC/ARM/neont2-absdiff-encoding.s b/test/MC/ARM/neont2-absdiff-encoding.s
index 4313483..ac2f9e7 100644
--- a/test/MC/ARM/neont2-absdiff-encoding.s
+++ b/test/MC/ARM/neont2-absdiff-encoding.s
@@ -1,4 +1,4 @@
-@RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding < %s | FileCheck %s
+@RUN: llvm-mc -triple thumbv7-unknown-unknown -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
 
 .code 16
 
diff --git a/test/MC/ARM/neont2-dup-encoding.s b/test/MC/ARM/neont2-dup-encoding.s
index bf25d70..d6db496 100644
--- a/test/MC/ARM/neont2-dup-encoding.s
+++ b/test/MC/ARM/neont2-dup-encoding.s
@@ -1,4 +1,4 @@
-@RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding < %s | FileCheck %s
+@RUN: llvm-mc -triple thumbv7-unknown-unknown -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
 
 .code 16
 
diff --git a/test/MC/ARM/neont2-shiftaccum-encoding.s b/test/MC/ARM/neont2-shiftaccum-encoding.s
index a3a18fc..3229b43 100644
--- a/test/MC/ARM/neont2-shiftaccum-encoding.s
+++ b/test/MC/ARM/neont2-shiftaccum-encoding.s
@@ -2,99 +2,211 @@
 
 .code 16
 
-@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0xc8,0xef,0x30,0x11]
-	vsra.s8	d17, d16, #8
-@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x11]
-	vsra.s16	d17, d16, #16
-@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x11]
-	vsra.s32	d17, d16, #32
-@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x11]
-	vsra.s64	d17, d16, #64
-@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0xc8,0xef,0x72,0x01]
-	vsra.s8	q8, q9, #8
-@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x01]
-	vsra.s16	q8, q9, #16
-@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x01]
-	vsra.s32	q8, q9, #32
-@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x01]
-	vsra.s64	q8, q9, #64
-@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x11]
-	vsra.u8	d17, d16, #8
-@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x11]
-	vsra.u16	d17, d16, #16
-@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x11]
-	vsra.u32	d17, d16, #32
-@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x11]
-	vsra.u64	d17, d16, #64
-@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0xc8,0xff,0x72,0x01]
-	vsra.u8	q8, q9, #8
-@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x01]
-	vsra.u16	q8, q9, #16
-@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x01]
-	vsra.u32	q8, q9, #32
-@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x01]
-	vsra.u64	q8, q9, #64
-@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0xc8,0xef,0x30,0x13]
-	vrsra.s8	d17, d16, #8
-@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x13]
-	vrsra.s16	d17, d16, #16
-@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x13]
-	vrsra.s32	d17, d16, #32
-@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x13]
-	vrsra.s64	d17, d16, #64
-@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0xc8,0xff,0x30,0x13]
-	vrsra.u8	d17, d16, #8
-@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x13]
-	vrsra.u16	d17, d16, #16
-@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x13]
-	vrsra.u32	d17, d16, #32
-@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x13]
-	vrsra.u64	d17, d16, #64
-@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0xc8,0xef,0x72,0x03]
-	vrsra.s8	q8, q9, #8
-@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x03]
-	vrsra.s16	q8, q9, #16
-@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x03]
-	vrsra.s32	q8, q9, #32
-@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x03]
-	vrsra.s64	q8, q9, #64
-@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0xc8,0xff,0x72,0x03]
-	vrsra.u8	q8, q9, #8
-@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x03]
-	vrsra.u16	q8, q9, #16
-@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x03]
-	vrsra.u32	q8, q9, #32
+	vsra.s8 d17, d16, #8
+	vsra.s16 d15, d14, #16
+	vsra.s32 d13, d12, #32
+	vsra.s64 d11, d10, #64
+	vsra.s8 q7, q2, #8
+	vsra.s16 q3, q6, #16
+	vsra.s32 q9, q5, #32
+	vsra.s64 q8, q4, #64
+	vsra.u8 d17, d16, #8
+	vsra.u16 d11, d14, #11
+	vsra.u32 d12, d15, #22
+	vsra.u64 d13, d16, #54
+	vsra.u8 q1, q7, #8
+	vsra.u16 q2, q7, #6
+	vsra.u32 q3, q6, #21
+	vsra.u64 q4, q5, #25
+
+        @ Two-operand syntax variant.
+	vsra.s8 d16, #8
+	vsra.s16 d14, #16
+	vsra.s32 d12, #32
+	vsra.s64 d10, #64
+	vsra.s8 q2, #8
+	vsra.s16 q6, #16
+	vsra.s32 q5, #32
+	vsra.s64 q4, #64
+	vsra.u8 d16, #8
+	vsra.u16 d14, #11
+	vsra.u32 d15, #22
+	vsra.u64 d16, #54
+	vsra.u8 q7, #8
+	vsra.u16 q7, #6
+	vsra.u32 q6, #21
+	vsra.u64 q5, #25
+
+@ CHECK: vsra.s8	d17, d16, #8    @ encoding: [0xc8,0xef,0x30,0x11]
+@ CHECK: vsra.s16	d15, d14, #16   @ encoding: [0x90,0xef,0x1e,0xf1]
+@ CHECK: vsra.s32	d13, d12, #32   @ encoding: [0xa0,0xef,0x1c,0xd1]
+@ CHECK: vsra.s64	d11, d10, #64   @ encoding: [0x80,0xef,0x9a,0xb1]
+@ CHECK: vsra.s8	q7, q2, #8      @ encoding: [0x88,0xef,0x54,0xe1]
+@ CHECK: vsra.s16	q3, q6, #16     @ encoding: [0x90,0xef,0x5c,0x61]
+@ CHECK: vsra.s32	q9, q5, #32     @ encoding: [0xe0,0xef,0x5a,0x21]
+@ CHECK: vsra.s64	q8, q4, #64     @ encoding: [0xc0,0xef,0xd8,0x01]
+@ CHECK: vsra.u8	d17, d16, #8    @ encoding: [0xc8,0xff,0x30,0x11]
+@ CHECK: vsra.u16	d11, d14, #11   @ encoding: [0x95,0xff,0x1e,0xb1]
+@ CHECK: vsra.u32	d12, d15, #22   @ encoding: [0xaa,0xff,0x1f,0xc1]
+@ CHECK: vsra.u64	d13, d16, #54   @ encoding: [0x8a,0xff,0xb0,0xd1]
+@ CHECK: vsra.u8	q1, q7, #8      @ encoding: [0x88,0xff,0x5e,0x21]
+@ CHECK: vsra.u16	q2, q7, #6      @ encoding: [0x9a,0xff,0x5e,0x41]
+@ CHECK: vsra.u32	q3, q6, #21     @ encoding: [0xab,0xff,0x5c,0x61]
+@ CHECK: vsra.u64	q4, q5, #25     @ encoding: [0xa7,0xff,0xda,0x81]
+
+@ CHECK: vsra.s8	d16, d16, #8            @ encoding: [0xc8,0xef,0x30,0x01]
+@ CHECK: vsra.s16	d14, d14, #16   @ encoding: [0x90,0xef,0x1e,0xe1]
+@ CHECK: vsra.s32	d12, d12, #32   @ encoding: [0xa0,0xef,0x1c,0xc1]
+@ CHECK: vsra.s64	d10, d10, #64   @ encoding: [0x80,0xef,0x9a,0xa1]
+@ CHECK: vsra.s8	q2, q2, #8              @ encoding: [0x88,0xef,0x54,0x41]
+@ CHECK: vsra.s16	q6, q6, #16     @ encoding: [0x90,0xef,0x5c,0xc1]
+@ CHECK: vsra.s32	q5, q5, #32     @ encoding: [0xa0,0xef,0x5a,0xa1]
+@ CHECK: vsra.s64	q4, q4, #64     @ encoding: [0x80,0xef,0xd8,0x81]
+@ CHECK: vsra.u8	d16, d16, #8            @ encoding: [0xc8,0xff,0x30,0x01]
+@ CHECK: vsra.u16	d14, d14, #11   @ encoding: [0x95,0xff,0x1e,0xe1]
+@ CHECK: vsra.u32	d15, d15, #22   @ encoding: [0xaa,0xff,0x1f,0xf1]
+@ CHECK: vsra.u64	d16, d16, #54   @ encoding: [0xca,0xff,0xb0,0x01]
+@ CHECK: vsra.u8	q7, q7, #8              @ encoding: [0x88,0xff,0x5e,0xe1]
+@ CHECK: vsra.u16	q7, q7, #6      @ encoding: [0x9a,0xff,0x5e,0xe1]
+@ CHECK: vsra.u32	q6, q6, #21     @ encoding: [0xab,0xff,0x5c,0xc1]
+@ CHECK: vsra.u64	q5, q5, #25     @ encoding: [0xa7,0xff,0xda,0xa1]
+
+
+	vrsra.s8 d5, d26, #8
+	vrsra.s16 d6, d25, #16
+	vrsra.s32 d7, d24, #32
+	vrsra.s64 d14, d23, #64
+	vrsra.u8 d15, d22, #8
+	vrsra.u16 d16, d21, #16
+	vrsra.u32 d17, d20, #32
+	vrsra.u64 d18, d19, #64
+	vrsra.s8 q1, q2, #8
+	vrsra.s16 q2, q3, #16
+	vrsra.s32 q3, q4, #32
+	vrsra.s64 q4, q5, #64
+	vrsra.u8 q5, q6, #8
+	vrsra.u16 q6, q7, #16
+	vrsra.u32 q7, q8, #32
+	vrsra.u64 q8, q9, #64
+
+        @ Two-operand syntax variant.
+	vrsra.s8 d26, #8
+	vrsra.s16 d25, #16
+	vrsra.s32 d24, #32
+	vrsra.s64 d23, #64
+	vrsra.u8 d22, #8
+	vrsra.u16 d21, #16
+	vrsra.u32 d20, #32
+	vrsra.u64 d19, #64
+	vrsra.s8 q2, #8
+	vrsra.s16 q3, #16
+	vrsra.s32 q4, #32
+	vrsra.s64 q5, #64
+	vrsra.u8 q6, #8
+	vrsra.u16 q7, #16
+	vrsra.u32 q8, #32
+	vrsra.u64 q9, #64
+
+@ CHECK: vrsra.s8	d5, d26, #8     @ encoding: [0x88,0xef,0x3a,0x53]
+@ CHECK: vrsra.s16	d6, d25, #16    @ encoding: [0x90,0xef,0x39,0x63]
+@ CHECK: vrsra.s32	d7, d24, #32    @ encoding: [0xa0,0xef,0x38,0x73]
+@ CHECK: vrsra.s64	d14, d23, #64   @ encoding: [0x80,0xef,0xb7,0xe3]
+@ CHECK: vrsra.u8	d15, d22, #8    @ encoding: [0x88,0xff,0x36,0xf3]
+@ CHECK: vrsra.u16	d16, d21, #16   @ encoding: [0xd0,0xff,0x35,0x03]
+@ CHECK: vrsra.u32	d17, d20, #32   @ encoding: [0xe0,0xff,0x34,0x13]
+@ CHECK: vrsra.u64	d18, d19, #64   @ encoding: [0xc0,0xff,0xb3,0x23]
+@ CHECK: vrsra.s8	q1, q2, #8      @ encoding: [0x88,0xef,0x54,0x23]
+@ CHECK: vrsra.s16	q2, q3, #16     @ encoding: [0x90,0xef,0x56,0x43]
+@ CHECK: vrsra.s32	q3, q4, #32     @ encoding: [0xa0,0xef,0x58,0x63]
+@ CHECK: vrsra.s64	q4, q5, #64     @ encoding: [0x80,0xef,0xda,0x83]
+@ CHECK: vrsra.u8	q5, q6, #8      @ encoding: [0x88,0xff,0x5c,0xa3]
+@ CHECK: vrsra.u16	q6, q7, #16     @ encoding: [0x90,0xff,0x5e,0xc3]
+@ CHECK: vrsra.u32	q7, q8, #32     @ encoding: [0xa0,0xff,0x70,0xe3]
 @ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x03]
-	vrsra.u64	q8, q9, #64
-@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0xcf,0xff,0x30,0x15]
-	vsli.8	d17, d16, #7
-@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0xdf,0xff,0x30,0x15]
-	vsli.16	d17, d16, #15
-@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0xff,0xff,0x30,0x15]
-	vsli.32	d17, d16, #31
-@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xff,0xff,0xb0,0x15]
-	vsli.64	d17, d16, #63
-@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0xcf,0xff,0x70,0x25]
-	vsli.8	q9, q8, #7
-@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0xdf,0xff,0x70,0x25]
-	vsli.16	q9, q8, #15
-@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0xff,0xff,0x70,0x25]
-	vsli.32	q9, q8, #31
-@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xff,0xff,0xf0,0x25]
-	vsli.64	q9, q8, #63
-@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x14]
-	vsri.8	d17, d16, #8
-@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0xd0,0xff,0x30,0x14]
-	vsri.16	d17, d16, #16
-@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0xe0,0xff,0x30,0x14]
-	vsri.32	d17, d16, #32
-@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xc0,0xff,0xb0,0x14]
-	vsri.64	d17, d16, #64
-@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0xc8,0xff,0x70,0x24]
-	vsri.8	q9, q8, #8
-@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0xd0,0xff,0x70,0x24]
-	vsri.16	q9, q8, #16
-@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0xe0,0xff,0x70,0x24]
-	vsri.32	q9, q8, #32
-@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xc0,0xff,0xf0,0x24]
-	vsri.64	q9, q8, #64
+
+@ CHECK: vrsra.s8	d26, d26, #8    @ encoding: [0xc8,0xef,0x3a,0xa3]
+@ CHECK: vrsra.s16	d25, d25, #16   @ encoding: [0xd0,0xef,0x39,0x93]
+@ CHECK: vrsra.s32	d24, d24, #32   @ encoding: [0xe0,0xef,0x38,0x83]
+@ CHECK: vrsra.s64	d23, d23, #64   @ encoding: [0xc0,0xef,0xb7,0x73]
+@ CHECK: vrsra.u8	d22, d22, #8    @ encoding: [0xc8,0xff,0x36,0x63]
+@ CHECK: vrsra.u16	d21, d21, #16   @ encoding: [0xd0,0xff,0x35,0x53]
+@ CHECK: vrsra.u32	d20, d20, #32   @ encoding: [0xe0,0xff,0x34,0x43]
+@ CHECK: vrsra.u64	d19, d19, #64   @ encoding: [0xc0,0xff,0xb3,0x33]
+@ CHECK: vrsra.s8	q2, q2, #8      @ encoding: [0x88,0xef,0x54,0x43]
+@ CHECK: vrsra.s16	q3, q3, #16     @ encoding: [0x90,0xef,0x56,0x63]
+@ CHECK: vrsra.s32	q4, q4, #32     @ encoding: [0xa0,0xef,0x58,0x83]
+@ CHECK: vrsra.s64	q5, q5, #64     @ encoding: [0x80,0xef,0xda,0xa3]
+@ CHECK: vrsra.u8	q6, q6, #8      @ encoding: [0x88,0xff,0x5c,0xc3]
+@ CHECK: vrsra.u16	q7, q7, #16     @ encoding: [0x90,0xff,0x5e,0xe3]
+@ CHECK: vrsra.u32	q8, q8, #32     @ encoding: [0xe0,0xff,0x70,0x03]
+@ CHECK: vrsra.u64	q9, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x23]
+
+
+	vsli.8 d11, d12, #7
+	vsli.16 d12, d13, #15
+	vsli.32 d13, d14, #31
+	vsli.64 d14, d15, #63
+	vsli.8 q1, q8, #7
+	vsli.16 q2, q7, #15
+	vsli.32 q3, q4, #31
+	vsli.64 q4, q5, #63
+	vsri.8 d28, d11, #8
+	vsri.16 d26, d12, #16
+	vsri.32 d24, d13, #32
+	vsri.64 d21, d14, #64
+	vsri.8 q1, q8, #8
+	vsri.16 q5, q2, #16
+	vsri.32 q7, q4, #32
+	vsri.64 q9, q6, #64
+
+        @ Two-operand syntax variant.
+	vsli.8 d12, #7
+	vsli.16 d13, #15
+	vsli.32 d14, #31
+	vsli.64 d15, #63
+	vsli.8 q8, #7
+	vsli.16 q7, #15
+	vsli.32 q4, #31
+	vsli.64 q5, #63
+	vsri.8 d11, #8
+	vsri.16 d12, #16
+	vsri.32 d13, #32
+	vsri.64 d14, #64
+	vsri.8 q8, #8
+	vsri.16 q2, #16
+	vsri.32 q4, #32
+	vsri.64 q6, #64
+
+@ CHECK: vsli.8	d11, d12, #7            @ encoding: [0x8f,0xff,0x1c,0xb5]
+@ CHECK: vsli.16	d12, d13, #15   @ encoding: [0x9f,0xff,0x1d,0xc5]
+@ CHECK: vsli.32	d13, d14, #31   @ encoding: [0xbf,0xff,0x1e,0xd5]
+@ CHECK: vsli.64	d14, d15, #63   @ encoding: [0xbf,0xff,0x9f,0xe5]
+@ CHECK: vsli.8	q1, q8, #7              @ encoding: [0x8f,0xff,0x70,0x25]
+@ CHECK: vsli.16	q2, q7, #15     @ encoding: [0x9f,0xff,0x5e,0x45]
+@ CHECK: vsli.32	q3, q4, #31     @ encoding: [0xbf,0xff,0x58,0x65]
+@ CHECK: vsli.64	q4, q5, #63     @ encoding: [0xbf,0xff,0xda,0x85]
+@ CHECK: vsri.8	d28, d11, #8            @ encoding: [0xc8,0xff,0x1b,0xc4]
+@ CHECK: vsri.16	d26, d12, #16   @ encoding: [0xd0,0xff,0x1c,0xa4]
+@ CHECK: vsri.32	d24, d13, #32   @ encoding: [0xe0,0xff,0x1d,0x84]
+@ CHECK: vsri.64	d21, d14, #64   @ encoding: [0xc0,0xff,0x9e,0x54]
+@ CHECK: vsri.8	q1, q8, #8              @ encoding: [0x88,0xff,0x70,0x24]
+@ CHECK: vsri.16	q5, q2, #16     @ encoding: [0x90,0xff,0x54,0xa4]
+@ CHECK: vsri.32	q7, q4, #32     @ encoding: [0xa0,0xff,0x58,0xe4]
+@ CHECK: vsri.64	q9, q6, #64     @ encoding: [0xc0,0xff,0xdc,0x24]
+
+@ CHECK: vsli.8	d12, d12, #7            @ encoding: [0x8f,0xff,0x1c,0xc5]
+@ CHECK: vsli.16	d13, d13, #15           @ encoding: [0x9f,0xff,0x1d,0xd5]
+@ CHECK: vsli.32	d14, d14, #31           @ encoding: [0xbf,0xff,0x1e,0xe5]
+@ CHECK: vsli.64	d15, d15, #63           @ encoding: [0xbf,0xff,0x9f,0xf5]
+@ CHECK: vsli.8	q8, q8, #7              @ encoding: [0xcf,0xff,0x70,0x05]
+@ CHECK: vsli.16	q7, q7, #15             @ encoding: [0x9f,0xff,0x5e,0xe5]
+@ CHECK: vsli.32	q4, q4, #31             @ encoding: [0xbf,0xff,0x58,0x85]
+@ CHECK: vsli.64	q5, q5, #63             @ encoding: [0xbf,0xff,0xda,0xa5]
+@ CHECK: vsri.8	d11, d11, #8            @ encoding: [0x88,0xff,0x1b,0xb4]
+@ CHECK: vsri.16	d12, d12, #16           @ encoding: [0x90,0xff,0x1c,0xc4]
+@ CHECK: vsri.32	d13, d13, #32           @ encoding: [0xa0,0xff,0x1d,0xd4]
+@ CHECK: vsri.64	d14, d14, #64           @ encoding: [0x80,0xff,0x9e,0xe4]
+@ CHECK: vsri.8	q8, q8, #8              @ encoding: [0xc8,0xff,0x70,0x04]
+@ CHECK: vsri.16	q2, q2, #16             @ encoding: [0x90,0xff,0x54,0x44]
+@ CHECK: vsri.32	q4, q4, #32             @ encoding: [0xa0,0xff,0x58,0x84]
+@ CHECK: vsri.64	q6, q6, #64             @ encoding: [0x80,0xff,0xdc,0xc4]
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index b592f1e..2a22620 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -1,124 +1,121 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
+        vadd.f64  d16, d17, d16
+        vadd.f32  s0, s1, s0
 @ CHECK: vadd.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x71,0xee]
-        vadd.f64        d16, d17, d16
-
 @ CHECK: vadd.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x30,0xee]
-        vadd.f32        s0, s1, s0
 
+        vsub.f64  d16, d17, d16
+        vsub.f32  s0, s1, s0
 @ CHECK: vsub.f64 d16, d17, d16      @ encoding: [0xe0,0x0b,0x71,0xee]
-        vsub.f64        d16, d17, d16
-
 @ CHECK: vsub.f32 s0, s1, s0         @ encoding: [0xc0,0x0a,0x30,0xee]
-        vsub.f32        s0, s1, s0
 
-@ CHECK: vdiv.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0xc1,0xee]
-        vdiv.f64        d16, d17, d16
+        vdiv.f64  d16, d17, d16
+        vdiv.f32  s0, s1, s0
+        vdiv.f32 s5, s7
+        vdiv.f64 d5, d7
 
-@ CHECK: vdiv.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x80,0xee]
-        vdiv.f32        s0, s1, s0
+@ CHECK: vdiv.f64 d16, d17, d16         @ encoding: [0xa0,0x0b,0xc1,0xee]
+@ CHECK: vdiv.f32 s0, s1, s0            @ encoding: [0x80,0x0a,0x80,0xee]
+@ CHECK: vdiv.f32	s5, s5, s7      @ encoding: [0xa3,0x2a,0xc2,0xee]
+@ CHECK: vdiv.f64	d5, d5, d7      @ encoding: [0x07,0x5b,0x85,0xee]
 
-@ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
-        vmul.f64        d16, d17, d16
 
-@ CHECK: vmul.f64	d20, d20, d17   @ encoding: [0xa1,0x4b,0x64,0xee]
+        vmul.f64  d16, d17, d16
 	vmul.f64  d20, d17
+        vmul.f32  s0, s1, s0
+	vmul.f32  s11, s21
 
-@ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
-        vmul.f32        s0, s1, s0
 
-@ CHECK: vmul.f32	s11, s11, s21   @ encoding: [0xaa,0x5a,0x65,0xee]
-	vmul.f32  s11, s21
+@ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
+@ CHECK: vmul.f64 d20, d20, d17      @ encoding: [0xa1,0x4b,0x64,0xee]
+@ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
+@ CHECK: vmul.f32 s11, s11, s21      @ encoding: [0xaa,0x5a,0x65,0xee]
 
-@ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
         vnmul.f64       d16, d17, d16
+        vnmul.f32       s0, s1, s0
 
+@ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
 @ CHECK: vnmul.f32 s0, s1, s0        @ encoding: [0xc0,0x0a,0x20,0xee]
-        vnmul.f32       s0, s1, s0
 
-@ CHECK: vcmpe.f64 d17, d16          @ encoding: [0xe0,0x1b,0xf4,0xee]
         vcmpe.f64       d17, d16
+        vcmpe.f32       s1, s0
 
+@ CHECK: vcmpe.f64 d17, d16          @ encoding: [0xe0,0x1b,0xf4,0xee]
 @ CHECK: vcmpe.f32 s1, s0            @ encoding: [0xc0,0x0a,0xf4,0xee]
-        vcmpe.f32       s1, s0
 
-@ CHECK: vcmpe.f64 d16, #0           @ encoding: [0xc0,0x0b,0xf5,0xee]
         vcmpe.f64       d16, #0
+        vcmpe.f32       s0, #0
 
+@ CHECK: vcmpe.f64 d16, #0           @ encoding: [0xc0,0x0b,0xf5,0xee]
 @ CHECK: vcmpe.f32 s0, #0            @ encoding: [0xc0,0x0a,0xb5,0xee]
-        vcmpe.f32       s0, #0
 
-@ CHECK: vabs.f64 d16, d16           @ encoding: [0xe0,0x0b,0xf0,0xee]
         vabs.f64        d16, d16
+        vabs.f32        s0, s0
 
+@ CHECK: vabs.f64 d16, d16           @ encoding: [0xe0,0x0b,0xf0,0xee]
 @ CHECK: vabs.f32 s0, s0             @ encoding: [0xc0,0x0a,0xb0,0xee]
-        vabs.f32        s0, s0
 
-@ CHECK: vcvt.f32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xb7,0xee]
         vcvt.f32.f64    s0, d16
+        vcvt.f64.f32    d16, s0
 
+@ CHECK: vcvt.f32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xb7,0xee]
 @ CHECK: vcvt.f64.f32 d16, s0        @ encoding: [0xc0,0x0a,0xf7,0xee]
-        vcvt.f64.f32    d16, s0
 
-@ CHECK: vneg.f64 d16, d16           @ encoding: [0x60,0x0b,0xf1,0xee]
         vneg.f64        d16, d16
+        vneg.f32        s0, s0
 
+@ CHECK: vneg.f64 d16, d16           @ encoding: [0x60,0x0b,0xf1,0xee]
 @ CHECK: vneg.f32 s0, s0             @ encoding: [0x40,0x0a,0xb1,0xee]
-        vneg.f32        s0, s0
 
-@ CHECK: vsqrt.f64 d16, d16          @ encoding: [0xe0,0x0b,0xf1,0xee]
         vsqrt.f64       d16, d16
+        vsqrt.f32       s0, s0
 
+@ CHECK: vsqrt.f64 d16, d16          @ encoding: [0xe0,0x0b,0xf1,0xee]
 @ CHECK: vsqrt.f32 s0, s0            @ encoding: [0xc0,0x0a,0xb1,0xee]
-        vsqrt.f32       s0, s0
 
-@ CHECK: vcvt.f64.s32 d16, s0        @ encoding: [0xc0,0x0b,0xf8,0xee]
         vcvt.f64.s32    d16, s0
-
-@ CHECK: vcvt.f32.s32 s0, s0         @ encoding: [0xc0,0x0a,0xb8,0xee]
         vcvt.f32.s32    s0, s0
-
-@ CHECK: vcvt.f64.u32 d16, s0        @ encoding: [0x40,0x0b,0xf8,0xee]
         vcvt.f64.u32    d16, s0
-
-@ CHECK: vcvt.f32.u32 s0, s0         @ encoding: [0x40,0x0a,0xb8,0xee]
         vcvt.f32.u32    s0, s0
-
-@ CHECK: vcvt.s32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbd,0xee]
         vcvt.s32.f64    s0, d16
-
-@ CHECK: vcvt.s32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbd,0xee]
         vcvt.s32.f32    s0, s0
-
-@ CHECK: vcvt.u32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbc,0xee]
         vcvt.u32.f64    s0, d16
+        vcvt.u32.f32    s0, s0
 
+@ CHECK: vcvt.f64.s32 d16, s0        @ encoding: [0xc0,0x0b,0xf8,0xee]
+@ CHECK: vcvt.f32.s32 s0, s0         @ encoding: [0xc0,0x0a,0xb8,0xee]
+@ CHECK: vcvt.f64.u32 d16, s0        @ encoding: [0x40,0x0b,0xf8,0xee]
+@ CHECK: vcvt.f32.u32 s0, s0         @ encoding: [0x40,0x0a,0xb8,0xee]
+@ CHECK: vcvt.s32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbd,0xee]
+@ CHECK: vcvt.s32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbd,0xee]
+@ CHECK: vcvt.u32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbc,0xee]
 @ CHECK: vcvt.u32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbc,0xee]
-        vcvt.u32.f32    s0, s0
 
-@ CHECK: vmla.f64 d16, d18, d17      @ encoding: [0xa1,0x0b,0x42,0xee]
+
         vmla.f64        d16, d18, d17
+        vmla.f32        s1, s2, s0
 
+@ CHECK: vmla.f64 d16, d18, d17      @ encoding: [0xa1,0x0b,0x42,0xee]
 @ CHECK: vmla.f32 s1, s2, s0         @ encoding: [0x00,0x0a,0x41,0xee]
-        vmla.f32        s1, s2, s0
 
-@ CHECK: vmls.f64 d16, d18, d17      @ encoding: [0xe1,0x0b,0x42,0xee]
         vmls.f64        d16, d18, d17
+        vmls.f32        s1, s2, s0
 
+@ CHECK: vmls.f64 d16, d18, d17      @ encoding: [0xe1,0x0b,0x42,0xee]
 @ CHECK: vmls.f32 s1, s2, s0         @ encoding: [0x40,0x0a,0x41,0xee]
-        vmls.f32        s1, s2, s0
 
-@ CHECK: vnmla.f64 d16, d18, d17     @ encoding: [0xe1,0x0b,0x52,0xee]
         vnmla.f64       d16, d18, d17
+        vnmla.f32       s1, s2, s0
 
+@ CHECK: vnmla.f64 d16, d18, d17     @ encoding: [0xe1,0x0b,0x52,0xee]
 @ CHECK: vnmla.f32 s1, s2, s0        @ encoding: [0x40,0x0a,0x51,0xee]
-        vnmla.f32       s1, s2, s0
 
-@ CHECK: vnmls.f64 d16, d18, d17     @ encoding: [0xa1,0x0b,0x52,0xee]
         vnmls.f64       d16, d18, d17
+        vnmls.f32       s1, s2, s0
 
+@ CHECK: vnmls.f64 d16, d18, d17     @ encoding: [0xa1,0x0b,0x52,0xee]
 @ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
-        vnmls.f32       s1, s2, s0
 
         vmrs    APSR_nzcv, fpscr
         vmrs    apsr_nzcv, fpscr
@@ -199,6 +196,27 @@
 @ CHECK: vmov r0, r1, d16            @ encoding: [0x30,0x0b,0x51,0xec]
         vmov    r0, r1, d16
 
+@ Between two single precision registers and two core registers
+        vmov s3, s4, r1, r2
+        vmov s2, s3, r1, r2
+        vmov r1, r2, s3, s4
+        vmov r1, r2, s2, s3
+@ CHECK: vmov s3, s4, r1, r2      @ encoding: [0x31,0x1a,0x42,0xec]
+@ CHECK: vmov s2, s3, r1, r2      @ encoding: [0x11,0x1a,0x42,0xec]
+@ CHECK: vmov r1, r2, s3, s4      @ encoding: [0x31,0x1a,0x52,0xec]
+@ CHECK: vmov r1, r2, s2, s3      @ encoding: [0x11,0x1a,0x52,0xec]
+
+@ Between one double precision register and two core registers
+        vmov d15, r1, r2 
+        vmov d16, r1, r2
+        vmov r1, r2, d15
+        vmov r1, r2, d16
+@ CHECK: vmov d15, r1, r2         @ encoding: [0x1f,0x1b,0x42,0xec]
+@ CHECK: vmov d16, r1, r2         @ encoding: [0x30,0x1b,0x42,0xec]
+@ CHECK: vmov r1, r2, d15         @ encoding: [0x1f,0x1b,0x52,0xec]
+@ CHECK: vmov r1, r2, d16         @ encoding: [0x30,0x1b,0x52,0xec]
+
+
 @ CHECK: vldr d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
 @ CHECK: vldr s0, [lr]            @ encoding: [0x00,0x0a,0x9e,0xed]
 @ CHECK: vldr d0, [lr]            @ encoding: [0x00,0x0b,0x9e,0xed]
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index 99d7e38..6f822d1 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -67,7 +67,7 @@ error: invalid operand for instruction
 @ Invalid writeback and register lists for STM
         stm r1, {r2, r6}
         stm r1!, {r2, r9}
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         stm r1, {r2, r6}
 @ CHECK-ERRORS:         ^
 @ CHECK-ERRORS: error: registers must be in range r0-r7
@@ -95,13 +95,13 @@ error: invalid operand for instruction
         str r2, [r7, #-1]
         str r5, [r1, #3]
         str r3, [r7, #128]
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         str r2, [r7, #-1]
 @ CHECK-ERRORS:         ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         str r5, [r1, #3]
 @ CHECK-ERRORS:         ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         str r3, [r7, #128]
 @ CHECK-ERRORS:         ^
 
@@ -111,7 +111,7 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS:         svc #-1
 @ CHECK-ERRORS:             ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: arm-mode
 @ CHECK-ERRORS:         svc #256
 @ CHECK-ERRORS:         ^
 
@@ -121,15 +121,38 @@ error: invalid operand for instruction
         add sp, #3
         add sp, sp, #512
         add r2, sp, #1024
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         add sp, #-1
 @ CHECK-ERRORS:                 ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         add sp, #3
 @ CHECK-ERRORS:                 ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         add sp, sp, #512
 @ CHECK-ERRORS:                     ^
-@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
+@ CHECK-ERRORS: error: instruction requires: arm-mode
 @ CHECK-ERRORS:         add r2, sp, #1024
 @ CHECK-ERRORS:         ^
+
+        add r2, sp, ip
+@ CHECK-ERRORS: error: source register must be the same as destination
+@ CHECK-ERRORS:         add r2, sp, ip
+@ CHECK-ERRORS:                     ^
+ 
+@------------------------------------------------------------------------------
+@ WFE/WFI/YIELD - are not supported pre v6T2
+@------------------------------------------------------------------------------
+        wfe
+        wfi
+        yield
+
+@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: wfe
+@ CHECK-ERRORS: ^
+@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: wfi
+@ CHECK-ERRORS: ^
+@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: yield
+@ CHECK-ERRORS: ^
+
diff --git a/test/MC/ARM/thumb.s b/test/MC/ARM/thumb.s
index 625882c..2223bdc 100644
--- a/test/MC/ARM/thumb.s
+++ b/test/MC/ARM/thumb.s
@@ -44,13 +44,6 @@
         nop
 @ CHECK: nop @ encoding: [0xc0,0x46]
 
-        wfe
-        wfi
-        yield
-@ CHECK: wfe                            @ encoding: [0x20,0xbf]
-@ CHECK: wfi                            @ encoding: [0x30,0xbf]
-@ CHECK: yield                          @ encoding: [0x10,0xbf]
-
         cpsie aif
 @ CHECK: cpsie aif                      @ encoding: [0x67,0xb6]
 
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index e38f53c..d94c686 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -40,5 +40,5 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
diff --git a/test/MC/ARM/thumb2-mclass.s b/test/MC/ARM/thumb2-mclass.s
index 10460f9..b7af723 100644
--- a/test/MC/ARM/thumb2-mclass.s
+++ b/test/MC/ARM/thumb2-mclass.s
@@ -44,9 +44,21 @@
 @------------------------------------------------------------------------------
 
         msr  apsr, r0
+        msr  apsr_nzcvq, r0
+        msr  apsr_g, r0
+        msr  apsr_nzcvqg, r0
         msr  iapsr, r0
+        msr  iapsr_nzcvq, r0
+        msr  iapsr_g, r0
+        msr  iapsr_nzcvqg, r0
         msr  eapsr, r0
+        msr  eapsr_nzcvq, r0
+        msr  eapsr_g, r0
+        msr  eapsr_nzcvqg, r0
         msr  xpsr, r0
+        msr  xpsr_nzcvq, r0
+        msr  xpsr_g, r0
+        msr  xpsr_nzcvqg, r0
         msr  ipsr, r0
         msr  epsr, r0
         msr  iepsr, r0
@@ -58,17 +70,29 @@
         msr  faultmask, r0
         msr  control, r0
 
-@ CHECK: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x80]
-@ CHECK: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x80]
-@ CHECK: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x80]
-@ CHECK: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x80]
-@ CHECK: msr	ipsr, r0                @ encoding: [0x80,0xf3,0x05,0x80]
-@ CHECK: msr	epsr, r0                @ encoding: [0x80,0xf3,0x06,0x80]
-@ CHECK: msr	iepsr, r0               @ encoding: [0x80,0xf3,0x07,0x80]
-@ CHECK: msr	msp, r0                 @ encoding: [0x80,0xf3,0x08,0x80]
-@ CHECK: msr	psp, r0                 @ encoding: [0x80,0xf3,0x09,0x80]
-@ CHECK: msr	primask, r0             @ encoding: [0x80,0xf3,0x10,0x80]
-@ CHECK: msr	basepri, r0             @ encoding: [0x80,0xf3,0x11,0x80]
-@ CHECK: msr	basepri_max, r0         @ encoding: [0x80,0xf3,0x12,0x80]
-@ CHECK: msr	faultmask, r0           @ encoding: [0x80,0xf3,0x13,0x80]
-@ CHECK: msr	control, r0             @ encoding: [0x80,0xf3,0x14,0x80]
+@ CHECK: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK: msr	apsr_g, r0              @ encoding: [0x80,0xf3,0x00,0x84]
+@ CHECK: msr	apsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x00,0x8c]
+@ CHECK: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK: msr	iapsr_g, r0             @ encoding: [0x80,0xf3,0x01,0x84]
+@ CHECK: msr	iapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x01,0x8c]
+@ CHECK: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK: msr	eapsr_g, r0             @ encoding: [0x80,0xf3,0x02,0x84]
+@ CHECK: msr	eapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x02,0x8c]
+@ CHECK: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
+@ CHECK: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
+@ CHECK: msr	xpsr_g, r0              @ encoding: [0x80,0xf3,0x03,0x84]
+@ CHECK: msr	xpsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x03,0x8c]
+@ CHECK: msr	ipsr, r0                @ encoding: [0x80,0xf3,0x05,0x88]
+@ CHECK: msr	epsr, r0                @ encoding: [0x80,0xf3,0x06,0x88]
+@ CHECK: msr	iepsr, r0               @ encoding: [0x80,0xf3,0x07,0x88]
+@ CHECK: msr	msp, r0                 @ encoding: [0x80,0xf3,0x08,0x88]
+@ CHECK: msr	psp, r0                 @ encoding: [0x80,0xf3,0x09,0x88]
+@ CHECK: msr	primask, r0             @ encoding: [0x80,0xf3,0x10,0x88]
+@ CHECK: msr	basepri, r0             @ encoding: [0x80,0xf3,0x11,0x88]
+@ CHECK: msr	basepri_max, r0         @ encoding: [0x80,0xf3,0x12,0x88]
+@ CHECK: msr	faultmask, r0           @ encoding: [0x80,0xf3,0x13,0x88]
+@ CHECK: msr	control, r0             @ encoding: [0x80,0xf3,0x14,0x88]
diff --git a/test/MC/ARM/thumb2-narrow-dp.ll b/test/MC/ARM/thumb2-narrow-dp.ll
new file mode 100644
index 0000000..ae2ba35
--- /dev/null
+++ b/test/MC/ARM/thumb2-narrow-dp.ll
@@ -0,0 +1,807 @@
+// RUN: llvm-mc -triple thumbv7 -show-encoding < %s | FileCheck %s
+
+// Test each of the Thumb1 data-processing instructions
+// The assembly syntax for these instructions allows an optional Rd register
+//   OP{S}{<c>}{<q>}  {<Rd>,} <Rn>, <Rm>
+// Assemblers should chose the narrow thumb encoding when possible, i.e.
+//   - Rd == Rn 
+//   - Rd, Rn and Rm are < r8
+// In addition, some operations are commutative, allowing the transormation 
+// when:
+//   - Rd == Rn || Rd == Rm
+//   - Rd, Rn and Rm are < r8
+
+// AND (commutative)
+    ANDS     r0, r2, r1          // Must be wide - 3 distinct registers
+    ANDS     r2, r2, r1          // Should choose narrow
+    ANDS     r2, r1, r2          // Should choose narrow - commutative
+    ANDS.W   r0, r0, r1          // Explicitly wide
+    ANDS.W   r3, r1, r3  
+    AND      r0, r1, r0          // Must use wide encoding as not flag-setting
+    ANDS     r7, r7, r1          // Should use narrow
+    ANDS     r7, r1, r7          // Commutative
+    ANDS     r8, r1, r8          // high registers so must use wide encoding
+    ANDS     r8, r8, r1
+    ANDS     r0, r8, r0
+    ANDS     r1, r1, r8
+    ANDS     r2, r2, r1, lsl #1  // Must use wide - shifted register
+    ANDS     r0, r1, r0, lsr #1
+// CHECK: ands.w  r0, r2, r1              @ encoding: [0x12,0xea,0x01,0x00]
+// CHECK: ands    r2, r1                  @ encoding: [0x0a,0x40]
+// CHECK: ands    r2, r1                  @ encoding: [0x0a,0x40]
+// CHECK: ands.w  r0, r0, r1              @ encoding: [0x10,0xea,0x01,0x00]
+// CHECK: ands.w  r3, r1, r3              @ encoding: [0x11,0xea,0x03,0x03]
+// CHECK: and.w   r0, r1, r0              @ encoding: [0x01,0xea,0x00,0x00]
+// CHECK: ands    r7, r1                  @ encoding: [0x0f,0x40]
+// CHECK: ands    r7, r1                  @ encoding: [0x0f,0x40]
+// CHECK: ands.w  r8, r1, r8              @ encoding: [0x11,0xea,0x08,0x08]
+// CHECK: ands.w  r8, r8, r1              @ encoding: [0x18,0xea,0x01,0x08]
+// CHECK: ands.w  r0, r8, r0              @ encoding: [0x18,0xea,0x00,0x00]
+// CHECK: ands.w  r1, r1, r8              @ encoding: [0x11,0xea,0x08,0x01]
+// CHECK: ands.w  r2, r2, r1, lsl #1      @ encoding: [0x12,0xea,0x41,0x02]
+// CHECK: ands.w  r0, r1, r0, lsr #1      @ encoding: [0x11,0xea,0x50,0x00]
+
+    IT EQ
+    ANDEQ    r0, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    ANDEQ    r3, r3, r1          // Should choose narrow
+    IT EQ
+    ANDEQ    r3, r1, r3          // Should choose narrow - commutative
+    IT EQ
+    ANDEQ.W  r0, r0, r1          // Explicitly wide
+    IT EQ
+    ANDEQ.W  r2, r1, r2  
+    IT EQ
+    ANDSEQ   r0, r1, r0          // Must use wide encoding as flag-setting
+    IT EQ
+    ANDEQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    ANDEQ    r7, r1, r7          // Commutative
+    IT EQ
+    ANDEQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    ANDEQ    r8, r8, r1
+    IT EQ
+    ANDEQ    r4, r8, r4
+    IT EQ
+    ANDEQ    r4, r4, r8
+    IT EQ
+    ANDEQ    r0, r0, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    ANDEQ    r5, r1, r5, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r0, r2, r1             @ encoding: [0x02,0xea,0x01,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq    r3, r1                 @ encoding: [0x0b,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq    r3, r1                 @ encoding: [0x0b,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r0, r0, r1             @ encoding: [0x00,0xea,0x01,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r2, r1, r2             @ encoding: [0x01,0xea,0x02,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andseq.w r0, r1, r0             @ encoding: [0x11,0xea,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq    r7, r1                 @ encoding: [0x0f,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq    r7, r1                 @ encoding: [0x0f,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r8, r1, r8             @ encoding: [0x01,0xea,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r8, r8, r1             @ encoding: [0x08,0xea,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r4, r8, r4             @ encoding: [0x08,0xea,0x04,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r4, r4, r8             @ encoding: [0x04,0xea,0x08,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r0, r0, r1, lsl #1     @ encoding: [0x00,0xea,0x41,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: andeq.w  r5, r1, r5, lsr #1     @ encoding: [0x01,0xea,0x55,0x05]
+
+// EOR (commutative)
+    EORS     r0, r2, r1          // Must be wide - 3 distinct registers
+    EORS     r5, r5, r1          // Should choose narrow
+    EORS     r5, r1, r5          // Should choose narrow - commutative
+    EORS.W   r0, r0, r1          // Explicitly wide
+    EORS.W   r2, r1, r2  
+    EOR      r1, r1, r1          // Must use wide encoding as not flag-setting
+    EORS     r7, r7, r1          // Should use narrow
+    EORS     r7, r1, r7          // Commutative
+    EORS     r8, r1, r8          // high registers so must use wide encoding
+    EORS     r8, r8, r1
+    EORS     r6, r8, r6
+    EORS     r0, r0, r8
+    EORS     r2, r2, r1, lsl #1  // Must use wide - shifted register
+    EORS     r0, r1, r0, lsr #1
+// CHECK: eors.w  r0, r2, r1              @ encoding: [0x92,0xea,0x01,0x00]
+// CHECK: eors    r5, r1                  @ encoding: [0x4d,0x40]
+// CHECK: eors    r5, r1                  @ encoding: [0x4d,0x40]
+// CHECK: eors.w  r0, r0, r1              @ encoding: [0x90,0xea,0x01,0x00]
+// CHECK: eors.w  r2, r1, r2              @ encoding: [0x91,0xea,0x02,0x02]
+// CHECK: eor.w   r1, r1, r1              @ encoding: [0x81,0xea,0x01,0x01]
+// CHECK: eors    r7, r1                  @ encoding: [0x4f,0x40]
+// CHECK: eors    r7, r1                  @ encoding: [0x4f,0x40]
+// CHECK: eors.w  r8, r1, r8              @ encoding: [0x91,0xea,0x08,0x08]
+// CHECK: eors.w  r8, r8, r1              @ encoding: [0x98,0xea,0x01,0x08]
+// CHECK: eors.w  r6, r8, r6              @ encoding: [0x98,0xea,0x06,0x06]
+// CHECK: eors.w  r0, r0, r8              @ encoding: [0x90,0xea,0x08,0x00]
+// CHECK: eors.w  r2, r2, r1, lsl #1      @ encoding: [0x92,0xea,0x41,0x02]
+// CHECK: eors.w  r0, r1, r0, lsr #1      @ encoding: [0x91,0xea,0x50,0x00]
+
+    IT EQ
+    EOREQ    r3, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    EOREQ    r0, r0, r1          // Should choose narrow
+    IT EQ
+    EOREQ    r2, r1, r2          // Should choose narrow - commutative
+    IT EQ
+    EOREQ.W  r3, r3, r1          // Explicitly wide
+    IT EQ
+    EOREQ.W  r0, r1, r0  
+    IT EQ
+    EORSEQ   r1, r1, r1          // Must use wide encoding as flag-setting
+    IT EQ
+    EOREQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    EOREQ    r7, r1, r7          // Commutative
+    IT EQ
+    EOREQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    EOREQ    r8, r8, r1
+    IT EQ
+    EOREQ    r0, r8, r0
+    IT EQ
+    EOREQ    r3, r3, r8
+    IT EQ
+    EOREQ    r4, r4, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    EOREQ    r0, r1, r0, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r3, r2, r1             @ encoding: [0x82,0xea,0x01,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq    r0, r1                 @ encoding: [0x48,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq    r2, r1                 @ encoding: [0x4a,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r3, r3, r1             @ encoding: [0x83,0xea,0x01,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r0, r1, r0             @ encoding: [0x81,0xea,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eorseq.w r1, r1, r1             @ encoding: [0x91,0xea,0x01,0x01]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq    r7, r1                 @ encoding: [0x4f,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq    r7, r1                 @ encoding: [0x4f,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r8, r1, r8             @ encoding: [0x81,0xea,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r8, r8, r1             @ encoding: [0x88,0xea,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r0, r8, r0             @ encoding: [0x88,0xea,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r3, r3, r8             @ encoding: [0x83,0xea,0x08,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r4, r4, r1, lsl #1     @ encoding: [0x84,0xea,0x41,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: eoreq.w  r0, r1, r0, lsr #1     @ encoding: [0x81,0xea,0x50,0x00]
+
+// LSL 
+    LSLS     r0, r2, r1          // Must be wide - 3 distinct registers
+    LSLS     r2, r2, r1          // Should choose narrow
+    LSLS     r2, r1, r2          // Should choose wide - not commutative
+    LSLS.W   r0, r0, r1          // Explicitly wide
+    LSLS.W   r4, r1, r4  
+    LSL      r4, r1, r4          // Must use wide encoding as not flag-setting
+    LSLS     r7, r7, r1          // Should use narrow
+    LSLS     r8, r1, r8          // high registers so must use wide encoding
+    LSLS     r8, r8, r1
+    LSLS     r3, r8, r3
+    LSLS     r5, r5, r8
+// CHECK: lsls.w  r0, r2, r1              @ encoding: [0x12,0xfa,0x01,0xf0]
+// CHECK: lsls    r2, r1                  @ encoding: [0x8a,0x40]
+// CHECK: lsls.w  r2, r1, r2              @ encoding: [0x11,0xfa,0x02,0xf2]
+// CHECK: lsls.w  r0, r0, r1              @ encoding: [0x10,0xfa,0x01,0xf0]
+// CHECK: lsls.w  r4, r1, r4              @ encoding: [0x11,0xfa,0x04,0xf4]
+// CHECK: lsl.w   r4, r1, r4              @ encoding: [0x01,0xfa,0x04,0xf4]
+// CHECK: lsls    r7, r1                  @ encoding: [0x8f,0x40]
+// CHECK: lsls.w  r8, r1, r8              @ encoding: [0x11,0xfa,0x08,0xf8]
+// CHECK: lsls.w  r8, r8, r1              @ encoding: [0x18,0xfa,0x01,0xf8]
+// CHECK: lsls.w  r3, r8, r3              @ encoding: [0x18,0xfa,0x03,0xf3]
+// CHECK: lsls.w  r5, r5, r8              @ encoding: [0x15,0xfa,0x08,0xf5]
+
+    IT EQ
+    LSLEQ    r0, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    LSLEQ    r2, r2, r1          // Should choose narrow
+    IT EQ
+    LSLEQ    r2, r1, r2          // Should choose wide - not commutative
+    IT EQ
+    LSLEQ.W  r0, r0, r1          // Explicitly wide
+    IT EQ
+    LSLEQ.W  r3, r1, r3  
+    IT EQ
+    LSLSEQ   r4, r1, r4          // Must use wide encoding as flag-setting
+    IT EQ
+    LSLEQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    LSLEQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    LSLEQ    r8, r8, r1
+    IT EQ
+    LSLEQ    r0, r8, r0
+    IT EQ
+    LSLEQ    r3, r3, r8
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r0, r2, r1             @ encoding: [0x02,0xfa,0x01,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq    r2, r1                 @ encoding: [0x8a,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r2, r1, r2             @ encoding: [0x01,0xfa,0x02,0xf2]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r0, r0, r1             @ encoding: [0x00,0xfa,0x01,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r3, r1, r3             @ encoding: [0x01,0xfa,0x03,0xf3]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lslseq.w r4, r1, r4             @ encoding: [0x11,0xfa,0x04,0xf4]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq    r7, r1                 @ encoding: [0x8f,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r8, r1, r8             @ encoding: [0x01,0xfa,0x08,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r8, r8, r1             @ encoding: [0x08,0xfa,0x01,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r0, r8, r0             @ encoding: [0x08,0xfa,0x00,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsleq.w  r3, r3, r8             @ encoding: [0x03,0xfa,0x08,0xf3]
+
+// LSR 
+    LSRS     r6, r2, r1          // Must be wide - 3 distinct registers
+    LSRS     r2, r2, r1          // Should choose narrow
+    LSRS     r2, r1, r2          // Should choose wide - not commutative
+    LSRS.W   r2, r2, r1          // Explicitly wide
+    LSRS.W   r3, r1, r3  
+    LSR      r4, r1, r4          // Must use wide encoding as not flag-setting
+    LSRS     r7, r7, r1          // Should use narrow
+    LSRS     r8, r1, r8          // high registers so must use wide encoding
+    LSRS     r8, r8, r1
+    LSRS     r2, r8, r2
+    LSRS     r5, r5, r8
+// CHECK: lsrs.w  r6, r2, r1              @ encoding: [0x32,0xfa,0x01,0xf6]
+// CHECK: lsrs    r2, r1                  @ encoding: [0xca,0x40]
+// CHECK: lsrs.w  r2, r1, r2              @ encoding: [0x31,0xfa,0x02,0xf2]
+// CHECK: lsrs.w  r2, r2, r1              @ encoding: [0x32,0xfa,0x01,0xf2]
+// CHECK: lsrs.w  r3, r1, r3              @ encoding: [0x31,0xfa,0x03,0xf3]
+// CHECK: lsr.w   r4, r1, r4              @ encoding: [0x21,0xfa,0x04,0xf4]
+// CHECK: lsrs    r7, r1                  @ encoding: [0xcf,0x40]
+// CHECK: lsrs.w  r8, r1, r8              @ encoding: [0x31,0xfa,0x08,0xf8]
+// CHECK: lsrs.w  r8, r8, r1              @ encoding: [0x38,0xfa,0x01,0xf8]
+// CHECK: lsrs.w  r2, r8, r2              @ encoding: [0x38,0xfa,0x02,0xf2]
+// CHECK: lsrs.w  r5, r5, r8              @ encoding: [0x35,0xfa,0x08,0xf5]
+
+    IT EQ
+    LSREQ    r6, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    LSREQ    r7, r7, r1          // Should choose narrow
+    IT EQ
+    LSREQ    r7, r1, r7          // Should choose wide - not commutative
+    IT EQ
+    LSREQ.W  r7, r7, r1          // Explicitly wide
+    IT EQ
+    LSREQ.W  r2, r1, r2  
+    IT EQ
+    LSRSEQ   r0, r1, r0          // Must use wide encoding as flag-setting
+    IT EQ
+    LSREQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    LSREQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    LSREQ    r8, r8, r1
+    IT EQ
+    LSREQ    r1, r8, r1
+    IT EQ
+    LSREQ    r4, r4, r8
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r6, r2, r1             @ encoding: [0x22,0xfa,0x01,0xf6]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq    r7, r1                 @ encoding: [0xcf,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r7, r1, r7             @ encoding: [0x21,0xfa,0x07,0xf7]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r7, r7, r1             @ encoding: [0x27,0xfa,0x01,0xf7]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r2, r1, r2             @ encoding: [0x21,0xfa,0x02,0xf2]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsrseq.w r0, r1, r0             @ encoding: [0x31,0xfa,0x00,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq    r7, r1                 @ encoding: [0xcf,0x40]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r8, r1, r8             @ encoding: [0x21,0xfa,0x08,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r8, r8, r1             @ encoding: [0x28,0xfa,0x01,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r1, r8, r1             @ encoding: [0x28,0xfa,0x01,0xf1]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: lsreq.w  r4, r4, r8             @ encoding: [0x24,0xfa,0x08,0xf4]
+
+// ASR 
+    ASRS     r7, r6, r5          // Must be wide - 3 distinct registers
+    ASRS     r0, r0, r1          // Should choose narrow
+    ASRS     r0, r1, r0          // Should choose wide - not commutative
+    ASRS.W   r3, r3, r1          // Explicitly wide
+    ASRS.W   r1, r1, r1  
+    ASR      r0, r1, r0          // Must use wide encoding as not flag-setting
+    ASRS     r7, r7, r1          // Should use narrow
+    ASRS     r8, r1, r8          // high registers so must use wide encoding
+    ASRS     r8, r8, r1
+    ASRS     r5, r8, r5
+    ASRS     r5, r5, r8
+// CHECK: asrs.w  r7, r6, r5              @ encoding: [0x56,0xfa,0x05,0xf7]
+// CHECK: asrs    r0, r1                  @ encoding: [0x08,0x41]
+// CHECK: asrs.w  r0, r1, r0              @ encoding: [0x51,0xfa,0x00,0xf0]
+// CHECK: asrs.w  r3, r3, r1              @ encoding: [0x53,0xfa,0x01,0xf3]
+// CHECK: asrs.w  r1, r1, r1              @ encoding: [0x51,0xfa,0x01,0xf1]
+// CHECK: asr.w   r0, r1, r0              @ encoding: [0x41,0xfa,0x00,0xf0]
+// CHECK: asrs    r7, r1                  @ encoding: [0x0f,0x41]
+// CHECK: asrs.w  r8, r1, r8              @ encoding: [0x51,0xfa,0x08,0xf8]
+// CHECK: asrs.w  r8, r8, r1              @ encoding: [0x58,0xfa,0x01,0xf8]
+// CHECK: asrs.w  r5, r8, r5              @ encoding: [0x58,0xfa,0x05,0xf5]
+// CHECK: asrs.w  r5, r5, r8              @ encoding: [0x55,0xfa,0x08,0xf5]
+
+    IT EQ
+    ASREQ    r0, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    ASREQ    r2, r2, r1          // Should choose narrow
+    IT EQ
+    ASREQ    r1, r2, r1          // Should choose wide - not commutative
+    IT EQ
+    ASREQ.W  r4, r4, r1          // Explicitly wide
+    IT EQ
+    ASREQ.W  r6, r1, r6  
+    IT EQ
+    ASRSEQ   r3, r1, r3          // Must use wide encoding as flag-setting
+    IT EQ
+    ASREQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    ASREQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    ASREQ    r8, r8, r1
+    IT EQ
+    ASREQ    r1, r8, r1
+    IT EQ
+    ASREQ    r3, r3, r8
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r0, r2, r1             @ encoding: [0x42,0xfa,0x01,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq    r2, r1                 @ encoding: [0x0a,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r1, r2, r1             @ encoding: [0x42,0xfa,0x01,0xf1]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r4, r4, r1             @ encoding: [0x44,0xfa,0x01,0xf4]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r6, r1, r6             @ encoding: [0x41,0xfa,0x06,0xf6]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asrseq.w r3, r1, r3             @ encoding: [0x51,0xfa,0x03,0xf3]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq    r7, r1                 @ encoding: [0x0f,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r8, r1, r8             @ encoding: [0x41,0xfa,0x08,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r8, r8, r1             @ encoding: [0x48,0xfa,0x01,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r1, r8, r1             @ encoding: [0x48,0xfa,0x01,0xf1]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: asreq.w  r3, r3, r8             @ encoding: [0x43,0xfa,0x08,0xf3]
+
+// ADC (commutative)
+    ADCS     r5, r2, r1          // Must be wide - 3 distinct registers
+    ADCS     r5, r5, r1          // Should choose narrow  
+    ADCS     r3, r1, r3          // Should choose narrow - commutative
+    ADCS.W   r2, r2, r1          // Explicitly wide
+    ADCS.W   r3, r1, r3  
+    ADC      r0, r1, r0          // Must use wide encoding as not flag-setting
+    ADCS     r7, r7, r1          // Should use narrow
+    ADCS     r7, r1, r7          // Commutative
+    ADCS     r8, r1, r8          // high registers so must use wide encoding
+    ADCS     r8, r8, r1
+    ADCS     r5, r8, r5
+    ADCS     r2, r2, r8
+    ADCS     r3, r3, r1, lsl #1  // Must use wide - shifted register
+    ADCS     r4, r1, r4, lsr #1
+// CHECK: adcs.w  r5, r2, r1              @ encoding: [0x52,0xeb,0x01,0x05]
+// CHECK: adcs    r5, r1                  @ encoding: [0x4d,0x41]
+// CHECK: adcs    r3, r1                  @ encoding: [0x4b,0x41]
+// CHECK: adcs.w  r2, r2, r1              @ encoding: [0x52,0xeb,0x01,0x02]
+// CHECK: adcs.w  r3, r1, r3              @ encoding: [0x51,0xeb,0x03,0x03]
+// CHECK: adc.w   r0, r1, r0              @ encoding: [0x41,0xeb,0x00,0x00]
+// CHECK: adcs    r7, r1                  @ encoding: [0x4f,0x41]
+// CHECK: adcs    r7, r1                  @ encoding: [0x4f,0x41]
+// CHECK: adcs.w  r8, r1, r8              @ encoding: [0x51,0xeb,0x08,0x08]
+// CHECK: adcs.w  r8, r8, r1              @ encoding: [0x58,0xeb,0x01,0x08]
+// CHECK: adcs.w  r5, r8, r5              @ encoding: [0x58,0xeb,0x05,0x05]
+// CHECK: adcs.w  r2, r2, r8              @ encoding: [0x52,0xeb,0x08,0x02]
+// CHECK: adcs.w  r3, r3, r1, lsl #1      @ encoding: [0x53,0xeb,0x41,0x03]
+// CHECK: adcs.w  r4, r1, r4, lsr #1      @ encoding: [0x51,0xeb,0x54,0x04]
+
+    IT EQ
+    ADCEQ    r1, r2, r3          // Must be wide - 3 distinct registers
+    IT EQ
+    ADCEQ    r1, r1, r1          // Should choose narrow
+    IT EQ
+    ADCEQ    r3, r1, r3          // Should choose narrow - commutative
+    IT EQ
+    ADCEQ.W  r3, r3, r1          // Explicitly wide
+    IT EQ
+    ADCEQ.W  r0, r1, r0  
+    IT EQ
+    ADCSEQ   r3, r1, r3          // Must use wide encoding as flag-setting
+    IT EQ
+    ADCEQ    r7, r7, r1          // Should use narrow 
+    IT EQ
+    ADCEQ    r7, r1, r7          // Commutative
+    IT EQ
+    ADCEQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    ADCEQ    r8, r8, r1
+    IT EQ
+    ADCEQ    r3, r8, r3
+    IT EQ
+    ADCEQ    r1, r1, r8
+    IT EQ
+    ADCEQ    r2, r2, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    ADCEQ    r1, r1, r1, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r1, r2, r3             @ encoding: [0x42,0xeb,0x03,0x01]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq    r1, r1                 @ encoding: [0x49,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq    r3, r1                 @ encoding: [0x4b,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r3, r3, r1             @ encoding: [0x43,0xeb,0x01,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r0, r1, r0             @ encoding: [0x41,0xeb,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adcseq.w r3, r1, r3             @ encoding: [0x51,0xeb,0x03,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq    r7, r1                 @ encoding: [0x4f,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq    r7, r1                 @ encoding: [0x4f,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r8, r1, r8             @ encoding: [0x41,0xeb,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r8, r8, r1             @ encoding: [0x48,0xeb,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r3, r8, r3             @ encoding: [0x48,0xeb,0x03,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r1, r1, r8             @ encoding: [0x41,0xeb,0x08,0x01]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r2, r2, r1, lsl #1     @ encoding: [0x42,0xeb,0x41,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: adceq.w  r1, r1, r1, lsr #1     @ encoding: [0x41,0xeb,0x51,0x01]
+
+// SBC 
+    SBCS     r3, r2, r1          // Must be wide - 3 distinct registers
+    SBCS     r4, r4, r1          // Should choose narrow  
+    SBCS     r1, r4, r1          // Should choose wide - not commutative  
+    SBCS.W   r4, r4, r1          // Explicitly wide
+    SBCS.W   r2, r1, r2  
+    SBC      r0, r1, r0          // Must use wide encoding as not flag-setting
+    SBCS     r7, r7, r1          // Should use narrow
+    SBCS     r8, r1, r8          // high registers so must use wide encoding
+    SBCS     r8, r8, r1
+    SBCS     r4, r8, r4
+    SBCS     r3, r3, r8
+    SBCS     r2, r2, r1, lsl #1  // Must use wide - shifted register
+    SBCS     r5, r1, r5, lsr #1
+// CHECK: sbcs.w  r3, r2, r1              @ encoding: [0x72,0xeb,0x01,0x03]
+// CHECK: sbcs    r4, r1                  @ encoding: [0x8c,0x41]
+// CHECK: sbcs.w  r1, r4, r1              @ encoding: [0x74,0xeb,0x01,0x01]
+// CHECK: sbcs.w  r4, r4, r1              @ encoding: [0x74,0xeb,0x01,0x04]
+// CHECK: sbcs.w  r2, r1, r2              @ encoding: [0x71,0xeb,0x02,0x02]
+// CHECK: sbc.w   r0, r1, r0              @ encoding: [0x61,0xeb,0x00,0x00]
+// CHECK: sbcs    r7, r1                  @ encoding: [0x8f,0x41]
+// CHECK: sbcs.w  r8, r1, r8              @ encoding: [0x71,0xeb,0x08,0x08]
+// CHECK: sbcs.w  r8, r8, r1              @ encoding: [0x78,0xeb,0x01,0x08]
+// CHECK: sbcs.w  r4, r8, r4              @ encoding: [0x78,0xeb,0x04,0x04]
+// CHECK: sbcs.w  r3, r3, r8              @ encoding: [0x73,0xeb,0x08,0x03]
+// CHECK: sbcs.w  r2, r2, r1, lsl #1      @ encoding: [0x72,0xeb,0x41,0x02]
+// CHECK: sbcs.w  r5, r1, r5, lsr #1      @ encoding: [0x71,0xeb,0x55,0x05]
+
+    IT EQ
+    SBCEQ    r5, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    SBCEQ    r5, r5, r1          // Should choose narrow
+    IT EQ
+    SBCEQ    r1, r5, r1          // Should choose narrow
+    IT EQ
+    SBCEQ.W  r5, r5, r1          // Explicitly wide
+    IT EQ
+    SBCEQ.W  r0, r1, r0  
+    IT EQ
+    SBCSEQ   r2, r1, r2          // Must use wide encoding as flag-setting
+    IT EQ
+    SBCEQ    r7, r7, r1          // Should use narrow 
+    IT EQ
+    SBCEQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    SBCEQ    r8, r8, r1
+    IT EQ
+    SBCEQ    r7, r8, r7
+    IT EQ
+    SBCEQ    r7, r7, r8
+    IT EQ
+    SBCEQ    r2, r2, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    SBCEQ    r5, r1, r5, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r5, r2, r1             @ encoding: [0x62,0xeb,0x01,0x05]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq    r5, r1                 @ encoding: [0x8d,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r1, r5, r1             @ encoding: [0x65,0xeb,0x01,0x01]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r5, r5, r1             @ encoding: [0x65,0xeb,0x01,0x05]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r0, r1, r0             @ encoding: [0x61,0xeb,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbcseq.w r2, r1, r2             @ encoding: [0x71,0xeb,0x02,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq    r7, r1                 @ encoding: [0x8f,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r8, r1, r8             @ encoding: [0x61,0xeb,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r8, r8, r1             @ encoding: [0x68,0xeb,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r7, r8, r7             @ encoding: [0x68,0xeb,0x07,0x07]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r7, r7, r8             @ encoding: [0x67,0xeb,0x08,0x07]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r2, r2, r1, lsl #1     @ encoding: [0x62,0xeb,0x41,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: sbceq.w  r5, r1, r5, lsr #1     @ encoding: [0x61,0xeb,0x55,0x05]
+
+// ROR 
+    RORS     r3, r2, r1          // Must be wide - 3 distinct registers
+    RORS     r0, r0, r1          // Should choose narrow
+    RORS     r1, r0, r1          // Should choose wide - not commutative
+    RORS.W   r2, r2, r1          // Explicitly wide
+    RORS.W   r2, r1, r2  
+    ROR      r5, r1, r5          // Must use wide encoding as not flag-setting
+    RORS     r7, r7, r1          // Should use narrow
+    RORS     r8, r1, r8          // high registers so must use wide encoding
+    RORS     r8, r8, r1
+    RORS     r6, r8, r6
+    RORS     r6, r6, r8
+// CHECK: rors.w  r3, r2, r1              @ encoding: [0x72,0xfa,0x01,0xf3]
+// CHECK: rors    r0, r1                  @ encoding: [0xc8,0x41]
+// CHECK: rors.w  r1, r0, r1              @ encoding: [0x70,0xfa,0x01,0xf1]
+// CHECK: rors.w  r2, r2, r1              @ encoding: [0x72,0xfa,0x01,0xf2]
+// CHECK: rors.w  r2, r1, r2              @ encoding: [0x71,0xfa,0x02,0xf2]
+// CHECK: ror.w   r5, r1, r5              @ encoding: [0x61,0xfa,0x05,0xf5]
+// CHECK: rors    r7, r1                  @ encoding: [0xcf,0x41]
+// CHECK: rors.w  r8, r1, r8              @ encoding: [0x71,0xfa,0x08,0xf8]
+// CHECK: rors.w  r8, r8, r1              @ encoding: [0x78,0xfa,0x01,0xf8]
+// CHECK: rors.w  r6, r8, r6              @ encoding: [0x78,0xfa,0x06,0xf6]
+// CHECK: rors.w  r6, r6, r8              @ encoding: [0x76,0xfa,0x08,0xf6]
+
+    IT EQ
+    ROREQ    r4, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    ROREQ    r4, r4, r1          // Should choose narrow
+    IT EQ
+    ROREQ    r1, r4, r1          // Should choose wide - not commutative
+    IT EQ
+    ROREQ.W  r4, r4, r1          // Explicitly wide
+    IT EQ
+    ROREQ.W  r0, r1, r0  
+    IT EQ
+    RORSEQ   r0, r1, r0          // Must use wide encoding as flag-setting
+    IT EQ
+    ROREQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    ROREQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    ROREQ    r8, r8, r1
+    IT EQ
+    ROREQ    r3, r8, r3
+    IT EQ
+    ROREQ    r1, r1, r8
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r4, r2, r1             @ encoding: [0x62,0xfa,0x01,0xf4]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq    r4, r1                 @ encoding: [0xcc,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r1, r4, r1             @ encoding: [0x64,0xfa,0x01,0xf1]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r4, r4, r1             @ encoding: [0x64,0xfa,0x01,0xf4]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r0, r1, r0             @ encoding: [0x61,0xfa,0x00,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: rorseq.w r0, r1, r0             @ encoding: [0x71,0xfa,0x00,0xf0]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq    r7, r1                 @ encoding: [0xcf,0x41]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r8, r1, r8             @ encoding: [0x61,0xfa,0x08,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r8, r8, r1             @ encoding: [0x68,0xfa,0x01,0xf8]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r3, r8, r3             @ encoding: [0x68,0xfa,0x03,0xf3]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: roreq.w  r1, r1, r8             @ encoding: [0x61,0xfa,0x08,0xf1]
+
+// TST - only two register version available
+// RSB - only two register version available
+// CMP - only two register version available
+// CMN - only two register version available
+
+// ORR (commutative)
+    ORRS     r7, r2, r1          // Must be wide - 3 distinct registers
+    ORRS     r2, r2, r1          // Should choose narrow
+    ORRS     r3, r1, r3          // Should choose narrow - commutative
+    ORRS.W   r4, r4, r1          // Explicitly wide
+    ORRS.W   r5, r1, r5  
+    ORR      r2, r1, r2          // Must use wide encoding as not flag-setting
+    ORRS     r7, r7, r1          // Should use narrow
+    ORRS     r7, r1, r7          // Commutative
+    ORRS     r8, r1, r8          // high registers so must use wide encoding
+    ORRS     r8, r8, r1
+    ORRS     r1, r8, r1
+    ORRS     r0, r0, r8
+    ORRS     r1, r1, r1, lsl #1  // Must use wide - shifted register
+    ORRS     r0, r1, r0, lsr #1
+// CHECK: orrs.w  r7, r2, r1              @ encoding: [0x52,0xea,0x01,0x07]
+// CHECK: orrs    r2, r1                  @ encoding: [0x0a,0x43]
+// CHECK: orrs    r3, r1                  @ encoding: [0x0b,0x43]
+// CHECK: orrs.w  r4, r4, r1              @ encoding: [0x54,0xea,0x01,0x04]
+// CHECK: orrs.w  r5, r1, r5              @ encoding: [0x51,0xea,0x05,0x05]
+// CHECK: orr.w   r2, r1, r2              @ encoding: [0x41,0xea,0x02,0x02]
+// CHECK: orrs    r7, r1                  @ encoding: [0x0f,0x43]
+// CHECK: orrs    r7, r1                  @ encoding: [0x0f,0x43]
+// CHECK: orrs.w  r8, r1, r8              @ encoding: [0x51,0xea,0x08,0x08]
+// CHECK: orrs.w  r8, r8, r1              @ encoding: [0x58,0xea,0x01,0x08]
+// CHECK: orrs.w  r1, r8, r1              @ encoding: [0x58,0xea,0x01,0x01]
+// CHECK: orrs.w  r0, r0, r8              @ encoding: [0x50,0xea,0x08,0x00]
+// CHECK: orrs.w  r1, r1, r1, lsl #1      @ encoding: [0x51,0xea,0x41,0x01]
+// CHECK: orrs.w  r0, r1, r0, lsr #1      @ encoding: [0x51,0xea,0x50,0x00]
+
+    IT EQ
+    ORREQ    r0, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    ORREQ    r5, r5, r1          // Should choose narrow
+    IT EQ
+    ORREQ    r5, r1, r5          // Should choose narrow - commutative
+    IT EQ
+    ORREQ.W  r2, r2, r1          // Explicitly wide
+    IT EQ
+    ORREQ.W  r3, r1, r3  
+    IT EQ
+    ORRSEQ   r4, r1, r4          // Must use wide encoding as flag-setting
+    IT EQ
+    ORREQ    r7, r7, r1          // Should use narrow
+    IT EQ
+    ORREQ    r7, r1, r7          // Commutative
+    IT EQ
+    ORREQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    ORREQ    r8, r8, r1
+    IT EQ
+    ORREQ    r0, r8, r0
+    IT EQ
+    ORREQ    r0, r0, r8
+    IT EQ
+    ORREQ    r2, r2, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    ORREQ    r2, r1, r2, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r0, r2, r1             @ encoding: [0x42,0xea,0x01,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq    r5, r1                 @ encoding: [0x0d,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq    r5, r1                 @ encoding: [0x0d,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r2, r2, r1             @ encoding: [0x42,0xea,0x01,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r3, r1, r3             @ encoding: [0x41,0xea,0x03,0x03]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orrseq.w r4, r1, r4             @ encoding: [0x51,0xea,0x04,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq    r7, r1                 @ encoding: [0x0f,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq    r7, r1                 @ encoding: [0x0f,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r8, r1, r8             @ encoding: [0x41,0xea,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r8, r8, r1             @ encoding: [0x48,0xea,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r0, r8, r0             @ encoding: [0x48,0xea,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r0, r0, r8             @ encoding: [0x40,0xea,0x08,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r2, r2, r1, lsl #1     @ encoding: [0x42,0xea,0x41,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: orreq.w  r2, r1, r2, lsr #1     @ encoding: [0x41,0xea,0x52,0x02]
+
+// MUL - not affected by this change
+
+// BIC 
+    BICS     r3, r2, r1          // Must be wide - 3 distinct registers
+    BICS     r2, r2, r1          // Should choose narrow  
+    BICS     r1, r2, r1          // Should choose wide - not commutative  
+    BICS.W   r2, r2, r1          // Explicitly wide
+    BICS.W   r0, r1, r0  
+    BIC      r0, r1, r0          // Must use wide encoding as not flag-setting
+    BICS     r7, r7, r1          // Should use narrow
+    BICS     r8, r1, r8          // high registers so must use wide encoding
+    BICS     r8, r8, r1
+    BICS     r7, r8, r7
+    BICS     r5, r5, r8
+    BICS     r3, r3, r1, lsl #1  // Must use wide - shifted register
+    BICS     r4, r1, r4, lsr #1
+// CHECK: bics.w  r3, r2, r1              @ encoding: [0x32,0xea,0x01,0x03]
+// CHECK: bics    r2, r1                  @ encoding: [0x8a,0x43]
+// CHECK: bics.w  r1, r2, r1              @ encoding: [0x32,0xea,0x01,0x01]
+// CHECK: bics.w  r2, r2, r1              @ encoding: [0x32,0xea,0x01,0x02]
+// CHECK: bics.w  r0, r1, r0              @ encoding: [0x31,0xea,0x00,0x00]
+// CHECK: bic.w   r0, r1, r0              @ encoding: [0x21,0xea,0x00,0x00]
+// CHECK: bics    r7, r1                  @ encoding: [0x8f,0x43]
+// CHECK: bics.w  r8, r1, r8              @ encoding: [0x31,0xea,0x08,0x08]
+// CHECK: bics.w  r8, r8, r1              @ encoding: [0x38,0xea,0x01,0x08]
+// CHECK: bics.w  r7, r8, r7              @ encoding: [0x38,0xea,0x07,0x07]
+// CHECK: bics.w  r5, r5, r8              @ encoding: [0x35,0xea,0x08,0x05]
+// CHECK: bics.w  r3, r3, r1, lsl #1      @ encoding: [0x33,0xea,0x41,0x03]
+// CHECK: bics.w  r4, r1, r4, lsr #1      @ encoding: [0x31,0xea,0x54,0x04]
+
+    IT EQ
+    BICEQ    r0, r2, r1          // Must be wide - 3 distinct registers
+    IT EQ
+    BICEQ    r5, r5, r1          // Should choose narrow
+    IT EQ
+    BICEQ    r1, r5, r1          // Should choose wide - not commutative
+    IT EQ
+    BICEQ.W  r4, r4, r1          // Explicitly wide
+    IT EQ
+    BICEQ.W  r2, r1, r2  
+    IT EQ
+    BICSEQ   r5, r1, r5          // Must use wide encoding as flag-setting
+    IT EQ
+    BICEQ    r7, r7, r1          // Should use narrow 
+    IT EQ
+    BICEQ    r8, r1, r8          // high registers so must use wide encoding
+    IT EQ
+    BICEQ    r8, r8, r1
+    IT EQ
+    BICEQ    r0, r8, r0
+    IT EQ
+    BICEQ    r2, r2, r8
+    IT EQ
+    BICEQ    r4, r4, r1, lsl #1  // Must use wide - shifted register
+    IT EQ
+    BICEQ    r5, r1, r5, lsr #1
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r0, r2, r1             @ encoding: [0x22,0xea,0x01,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq    r5, r1                 @ encoding: [0x8d,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r1, r5, r1             @ encoding: [0x25,0xea,0x01,0x01]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r4, r4, r1             @ encoding: [0x24,0xea,0x01,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r2, r1, r2             @ encoding: [0x21,0xea,0x02,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: bicseq.w r5, r1, r5             @ encoding: [0x31,0xea,0x05,0x05]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq    r7, r1                 @ encoding: [0x8f,0x43]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r8, r1, r8             @ encoding: [0x21,0xea,0x08,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r8, r8, r1             @ encoding: [0x28,0xea,0x01,0x08]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r0, r8, r0             @ encoding: [0x28,0xea,0x00,0x00]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r2, r2, r8             @ encoding: [0x22,0xea,0x08,0x02]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r4, r4, r1, lsl #1     @ encoding: [0x24,0xea,0x41,0x04]
+// CHECK: it eq                           @ encoding: [0x08,0xbf]
+// CHECK: biceq.w  r5, r1, r5, lsr #1     @ encoding: [0x21,0xea,0x55,0x05]
+
+// CMN - only two register version available
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
index cc87a38..0a1fe92 100644
--- a/test/MC/ARM/vfp4.s
+++ b/test/MC/ARM/vfp4.s
@@ -1,5 +1,6 @@
 @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
 @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
+@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
 
 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
@@ -7,6 +8,7 @@ vfma.f64 d16, d18, d17
 
 @ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
 @ THUMB: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
+@ THUMB_V7EM: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
 vfma.f32 s2, s4, s0
 
 @ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
@@ -23,6 +25,7 @@ vfnma.f64 d16, d18, d17
 
 @ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
 @ THUMB: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
+@ THUMB_V7EM: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
 vfnma.f32 s2, s4, s0
 
 @ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
@@ -31,6 +34,7 @@ vfms.f64 d16, d18, d17
 
 @ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
 @ THUMB: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
+@ THUMB_V7EM: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
 vfms.f32 s2, s4, s0
 
 @ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s
index 4fb4dec..31f5524 100644
--- a/test/MC/ARM/vpush-vpop.s
+++ b/test/MC/ARM/vpush-vpop.s
@@ -1,5 +1,5 @@
-@ RUN: llvm-mc -triple armv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-ARM %s
-@ RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-THUMB %s
+@ RUN: llvm-mc -triple armv7-unknown-unknown -mcpu=cortex-a8 -show-encoding < %s | FileCheck --check-prefix=CHECK-ARM %s
+@ RUN: llvm-mc -triple thumbv7-unknown-unknown -mcpu=cortex-a8 -show-encoding < %s | FileCheck --check-prefix=CHECK-THUMB %s
 
 foo:
 @ CHECK: foo
diff --git a/test/MC/AsmParser/extern.s b/test/MC/AsmParser/extern.s
new file mode 100644
index 0000000..461f843
--- /dev/null
+++ b/test/MC/AsmParser/extern.s
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK-NOT: foo
+.extern foo
diff --git a/test/MC/AsmParser/ifb.s b/test/MC/AsmParser/ifb.s
new file mode 100644
index 0000000..48d69f4
--- /dev/null
+++ b/test/MC/AsmParser/ifb.s
@@ -0,0 +1,67 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+defined:
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifb
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifb defined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifb undefined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifb ""
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnb
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnb defined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnb undefined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnb ""
+	.byte 1
+.else
+	.byte 0
+.endif
diff --git a/test/MC/AsmParser/ifc.s b/test/MC/AsmParser/ifc.s
new file mode 100644
index 0000000..20e55c0
--- /dev/null
+++ b/test/MC/AsmParser/ifc.s
@@ -0,0 +1,65 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifc foo, foo
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifc "foo space", "foo space"
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifc foo space, foo space
+	.byte 1
+.else
+	.byte 0
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifc unequal, unEqual
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnc foo, foo
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnc "foo space", "foo space"
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnc foo space, foo space
+	.byte 0
+.else
+	.byte 1
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifnc unequal, unEqual
+	.byte 1
+.else
+	.byte 0
+.endif
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
index 13b197a..6d08421 100644
--- a/test/MC/AsmParser/macro-args.s
+++ b/test/MC/AsmParser/macro-args.s
@@ -42,3 +42,15 @@ top bar, 42
 // CHECK-NOT: fred
 // CHECK: _bar
 // CHECK-NEXT: fred = 42
+
+
+.macro foo
+foo_$0_$1_$2_$3:
+  nop
+.endm
+
+foo 1, 2, 3, 4
+foo 1, , 3, 4
+
+// CHECK: foo_1_2_3_4:
+// CHECK: foo_1__3_4:
diff --git a/test/MC/AsmParser/macro-err1.s b/test/MC/AsmParser/macro-err1.s
new file mode 100644
index 0000000..924deb0
--- /dev/null
+++ b/test/MC/AsmParser/macro-err1.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.macro foo bar
+        .long \bar
+.endm
+
+foo 42,  42
+
+// CHECK: Too many arguments
diff --git a/test/MC/AsmParser/macro-irp.s b/test/MC/AsmParser/macro-irp.s
new file mode 100644
index 0000000..a368b74
--- /dev/null
+++ b/test/MC/AsmParser/macro-irp.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
+
+.irp reg,%eax,%ebx
+        pushl \reg
+.endr
+
+// CHECK: pushl %eax
+// CHECK: pushl %ebx
diff --git a/test/MC/AsmParser/macro-irpc.s b/test/MC/AsmParser/macro-irpc.s
new file mode 100644
index 0000000..ea5efbf
--- /dev/null
+++ b/test/MC/AsmParser/macro-irpc.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
+
+.irpc foo,123
+        .long \foo
+.endr
+
+// CHECK: long 1
+// CHECK: long 2
+// CHECK: long 3
diff --git a/test/MC/AsmParser/macro-rept-err1.s b/test/MC/AsmParser/macro-rept-err1.s
new file mode 100644
index 0000000..db92856
--- /dev/null
+++ b/test/MC/AsmParser/macro-rept-err1.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.endr
+
+// CHECK: unexpected '.endr' directive, no current .rept
diff --git a/test/MC/AsmParser/macro-rept-err2.s b/test/MC/AsmParser/macro-rept-err2.s
new file mode 100644
index 0000000..678b4c7
--- /dev/null
+++ b/test/MC/AsmParser/macro-rept-err2.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.rept 3
+.long
+
+// CHECK: no matching '.endr' in definition
diff --git a/test/MC/AsmParser/macro-rept.s b/test/MC/AsmParser/macro-rept.s
new file mode 100644
index 0000000..1dc8060
--- /dev/null
+++ b/test/MC/AsmParser/macro-rept.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
+
+.rept 2
+    .long 1
+.endr
+
+.rept 3
+.rept 2
+    .long 0
+.endr
+.endr
+
+// CHECK: .long	1
+// CHECK: .long	1
+
+// CHECK: .long	0
+// CHECK: .long	0
+// CHECK: .long	0
+
+// CHECK: .long	0
+// CHECK: .long	0
+// CHECK: .long	0
diff --git a/test/MC/AsmParser/macros-parsing.s b/test/MC/AsmParser/macros-parsing.s
index 65f6454..75aaac03 100644
--- a/test/MC/AsmParser/macros-parsing.s
+++ b/test/MC/AsmParser/macros-parsing.s
@@ -5,7 +5,7 @@
 .endmacro
 
 .macros_off
-// CHECK-ERRORS: 9:1: warning: ignoring directive for now
+// CHECK-ERRORS: 9:1: error: unknown directive
 .test0
 .macros_on
 
diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s
index 214274d..2957592 100644
--- a/test/MC/AsmParser/macros.s
+++ b/test/MC/AsmParser/macros.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
 // RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
 
 .macro .test0
@@ -9,7 +9,7 @@
 .endmacro
 
 .test1
-// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now
+// CHECK-ERRORS: <instantiation>:1:1: error: unknown directive
 // CHECK-ERRORS-NEXT: macrobody0
 // CHECK-ERRORS-NEXT: ^
 // CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation
diff --git a/test/MC/AsmParser/purgem.s b/test/MC/AsmParser/purgem.s
new file mode 100644
index 0000000..c76c1c6
--- /dev/null
+++ b/test/MC/AsmParser/purgem.s
@@ -0,0 +1,12 @@
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s
+
+.macro foo
+.err
+.endm
+
+.purgem bar
+# CHECK: error: macro 'bar' is not defined
+
+.purgem foo
+foo
+# CHECK: error: invalid instruction mnemonic 'foo'
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index 8cafcb3..3f72805 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -14,7 +14,6 @@
 // CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
 // CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
 // CHECK-NEXT:   IMAGE_SCN_MEM_READ
-// CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
 // CHECK-NEXT: SectionData
 // CHECK-NEXT:   09 12 08 03 00 03 0F 30 - 0E 88 00 00 09 64 02 00
 // CHECK-NEXT:   04 22 00 1A 00 00 00 00 - 00 00 00 00 21 00 00 00
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 471076a..0c9aaab 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 -mattr +mp | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 -mcpu=cortex-a9-mp | FileCheck %s
 
 # CHECK:	addpl	r4, pc, #318767104
 0x4c 0x45 0x8f 0x52
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index fc7eda5..1100ce6 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=armv7-apple-darwin -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # ADC (immediate)
@@ -169,9 +169,15 @@
 #------------------------------------------------------------------------------
 # CHECK: add	r2, pc, #3
 # CHECK: sub	r2, pc, #3
+# CHECK: sub	r1, pc, #0
+# CHECK: sub	r1, pc, #301989888
+# CHECK: add	r1, pc, #301989888
 
 0x03 0x20 0x8f 0xe2
 0x03 0x20 0x4f 0xe2
+0x00 0x10 0x4f 0xe2
+0x12 0x14 0x4f 0xe2
+0x12 0x14 0x8f 0xe2
 
 #------------------------------------------------------------------------------
 # AND
@@ -469,47 +475,77 @@
 #------------------------------------------------------------------------------
 # DMB
 #------------------------------------------------------------------------------
-# CHECK: dmb sy
-# CHECK: dmb st
-# CHECK: dmb ish
-# CHECK: dmb ishst
-# CHECK: dmb nsh
-# CHECK: dmb nshst
-# CHECK: dmb osh
+
+# CHECK: dmb #0x0
+# CHECK: dmb #0x1
 # CHECK: dmb oshst
-# CHECK: dmb
+# CHECK: dmb osh
+# CHECK: dmb #0x4
+# CHECK: dmb #0x5
+# CHECK: dmb nshst
+# CHECK: dmb nsh
+# CHECK: dmb #0x8
+# CHECK: dmb #0x9
+# CHECK: dmb ishst
+# CHECK: dmb ish
+# CHECK: dmb #0xc
+# CHECK: dmb #0xd
+# CHECK: dmb st
+# CHECK: dmb sy
 
-0x5f 0xf0 0x7f 0xf5
-0x5e 0xf0 0x7f 0xf5
-0x5b 0xf0 0x7f 0xf5
-0x5a 0xf0 0x7f 0xf5
-0x57 0xf0 0x7f 0xf5
-0x56 0xf0 0x7f 0xf5
-0x53 0xf0 0x7f 0xf5
+0x50 0xf0 0x7f 0xf5
+0x51 0xf0 0x7f 0xf5
 0x52 0xf0 0x7f 0xf5
+0x53 0xf0 0x7f 0xf5
+0x54 0xf0 0x7f 0xf5
+0x55 0xf0 0x7f 0xf5
+0x56 0xf0 0x7f 0xf5
+0x57 0xf0 0x7f 0xf5
+0x58 0xf0 0x7f 0xf5
+0x59 0xf0 0x7f 0xf5
+0x5a 0xf0 0x7f 0xf5
+0x5b 0xf0 0x7f 0xf5
+0x5c 0xf0 0x7f 0xf5
+0x5d 0xf0 0x7f 0xf5
+0x5e 0xf0 0x7f 0xf5
 0x5f 0xf0 0x7f 0xf5
 
 #------------------------------------------------------------------------------
 # DSB
 #------------------------------------------------------------------------------
-# CHECK: dsb sy
-# CHECK: dsb st
-# CHECK: dsb ish
-# CHECK: dsb ishst
-# CHECK: dsb nsh
-# CHECK: dsb nshst
-# CHECK: dsb osh
-# CHECK: dsb oshst
-# CHECK: dsb
 
-0x4f 0xf0 0x7f 0xf5
-0x4e 0xf0 0x7f 0xf5
-0x4b 0xf0 0x7f 0xf5
-0x4a 0xf0 0x7f 0xf5
-0x47 0xf0 0x7f 0xf5
-0x46 0xf0 0x7f 0xf5
-0x43 0xf0 0x7f 0xf5
+# CHECK: dsb	#0x0
+# CHECK: dsb	#0x1
+# CHECK: dsb	oshst
+# CHECK: dsb	osh
+# CHECK: dsb	#0x4
+# CHECK: dsb	#0x5
+# CHECK: dsb	nshst
+# CHECK: dsb	nsh
+# CHECK: dsb	#0x8
+# CHECK: dsb	#0x9
+# CHECK: dsb	ishst
+# CHECK: dsb	ish
+# CHECK: dsb	#0xc
+# CHECK: dsb	#0xd
+# CHECK: dsb	st
+# CHECK: dsb	sy
+
+0x40 0xf0 0x7f 0xf5
+0x41 0xf0 0x7f 0xf5
 0x42 0xf0 0x7f 0xf5
+0x43 0xf0 0x7f 0xf5
+0x44 0xf0 0x7f 0xf5
+0x45 0xf0 0x7f 0xf5
+0x46 0xf0 0x7f 0xf5
+0x47 0xf0 0x7f 0xf5
+0x48 0xf0 0x7f 0xf5
+0x49 0xf0 0x7f 0xf5
+0x4a 0xf0 0x7f 0xf5
+0x4b 0xf0 0x7f 0xf5
+0x4c 0xf0 0x7f 0xf5
+0x4d 0xf0 0x7f 0xf5
+0x4e 0xf0 0x7f 0xf5
 0x4f 0xf0 0x7f 0xf5
 
 #------------------------------------------------------------------------------
diff --git a/test/MC/Disassembler/ARM/fp-encoding.txt b/test/MC/Disassembler/ARM/fp-encoding.txt
index 9095b84..8dedf80 100644
--- a/test/MC/Disassembler/ARM/fp-encoding.txt
+++ b/test/MC/Disassembler/ARM/fp-encoding.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple armv7-apple-darwin -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple armv7-apple-darwin -mcpu=cortex-a8 -disassemble < %s | FileCheck %s
 
 0xa0 0x0b 0x71 0xee
 # CHECK: vadd.f64        d16, d17, d16
@@ -203,6 +203,33 @@
 # CHECK: vstmia  r1, {d2, d3, d4, d5, d6, d7}
 # CHECK: vstmia  r1, {s2, s3, s4, s5, s6, s7}
 
+0x05 0x9a 0xc0 0x0c
+0x0c 0x0b 0xc7 0x0c
+0x06 0x9a 0x93 0x0c
+0x0a 0x5b 0xd2 0x0c
+# CHECK: vstmiaeq r0, {s19, s20, s21, s22, s23}
+# CHECK: vstmiaeq r7, {d16, d17, d18, d19, d20, d21}
+# CHECK: vldmiaeq r3, {s18, s19, s20, s21, s22, s23}
+# CHECK: vldmiaeq r2, {d21, d22, d23, d24, d25}
+
+0x04 0xca 0x6c 0x0d
+0x06 0x1b 0x69 0x0d
+0x03 0xaa 0x75 0x0d
+0x08 0xeb 0x37 0x0d
+# CHECK: vstmdbeq r12!, {s25, s26, s27, s28}
+# CHECK: vstmdbeq r9!, {d17, d18, d19}
+# CHECK: vldmdbeq r5!, {s21, s22, s23}
+# CHECK: vldmdbeq r7!, {d14, d15, d16, d17}
+
+0x04 0x7a 0xa6 0x0c
+0x0c 0xfb 0xa4 0x0c
+0x03 0xaa 0xf8 0x0c
+0x0a 0x3b 0xfb 0x0c
+# CHECK: vstmiaeq r6!, {s14, s15, s16, s17}
+# CHECK: vstmiaeq r4!, {d15, d16, d17, d18, d19, d20}
+# CHECK: vldmiaeq r8!, {s21, s22, s23}
+# CHECK: vldmiaeq r11!, {d19, d20, d21, d22, d23}
+
 0x40 0x0b 0xbd 0xee
 0x60 0x0a 0xbd 0xee
 0x40 0x0b 0xbc 0xee
diff --git a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
index a0d5944..f7acce9 100644
--- a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=60 Name=BFI Format=ARM_FORMAT_DPFRM(4)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
index d2d424c..356c376 100644
--- a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
index 10748e9..bc8b7e1 100644
--- a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # invalid imod value (0b01)
 0xc0 0x67 0x4 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
index 8146b5c..842a52b 100644
--- a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
 
 # invalid (imod, M, iflags) combination
 0x93 0x00 0x02 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
index b441485..8396156 100644
--- a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1908 Name=t2DMB Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
index de042a97..2c6e6a7 100644
--- a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=102 Name=DSB Format=ARM_FORMAT_MISCFRM(26)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt
index 6174e92..4297c01 100644
--- a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
 
 # CBZ / CBNZ not allowed in IT block.
 
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
index 17e25ea..733895d 100644
--- a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
+++ b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep und
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown 2>&1 | grep und
 # rdar://10841671
 
 0xe3 0xbf
diff --git a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-thumb.txt
index 9b571b3..1a8ff48 100644
--- a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-IT-thumb.txt
@@ -1,3 +1,3 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown 2>&1 | grep "potentially undefined instruction encoding"
 
 0xff 0xbf 0x6b 0x80 0x00 0x75
diff --git a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
index 0b0426b..6cff09e 100644
--- a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=0 Name=PHI Format=(42)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt
index a42b248..7d8c492 100644
--- a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
 
 # Writeback is not allowed is Rn is in the target register list.
 
diff --git a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
index 6b695b9..68d22de 100644
--- a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
 
 # Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
index 7ea1b46..4df5309 100644
--- a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1930 Name=t2LDRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
index eef2c45..0cff28a 100644
--- a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # LDR_PRE/POST has encoding Inst{4} = 0.
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
index e42e0de..30cb727 100644
--- a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
 
 # Opcode=165 Name=LDR_PRE Format=ARM_FORMAT_LDFRM(6)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
index 23a0b85..7b7286a 100644
--- a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # LDR (register) has encoding Inst{4} = 0.
 0xba 0xae 0x9f 0x57
diff --git a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
index 8343d54..bb4b06c 100644
--- a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=171 Name=MCR Format=ARM_FORMAT_BRFRM(2)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
index 235952f..528563a 100644
--- a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=185 Name=MOVTi16 Format=ARM_FORMAT_DPFRM(4)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
index 01c1466..41ec53f 100644
--- a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=0 Name=PHI Format=(42)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
index 757d167..e5f2a5e 100644
--- a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
index ba48877..3f4c1e5 100644
--- a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=0 Name=PHI Format=(42)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt b/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt
index aaae6ce..c20ce54 100644
--- a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: invalid instruction encoding
 0x00 0x1a 0x50 0xfc
diff --git a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
index 3765b1f..901667a 100644
--- a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=206 Name=MSRi Format=ARM_FORMAT_BRFRM(2)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
index cffd86d..499aa86 100644
--- a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=134 Name=LDMIA Format=ARM_FORMAT_LDSTMULFRM(10)
 # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
index 9e16536..7bc97d5 100644
--- a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=271 Name=SBFX Format=ARM_FORMAT_DPFRM(4)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
index 91f3d58..fe4f43a 100644
--- a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=284 Name=SMLAD Format=ARM_FORMAT_MULFRM(1)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
index fc5c711..eedd05c 100644
--- a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=0 Name=PHI Format=(42)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
index ca16724..3d5235d 100644
--- a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=2313 Name=tSTMIA_UPD Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
index 400d44c..f67f38e 100644
--- a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=390 Name=SXTBr_rot Format=ARM_FORMAT_EXTFRM(14)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
index c7cbd84..f57c48f 100644
--- a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=419 Name=UMAAL Format=ARM_FORMAT_MULFRM(1)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
index 12da869..5ba7d61 100644
--- a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30)
diff --git a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
index bab32ca..58def05 100644
--- a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=871 Name=VLD3DUPd32_UPD Format=ARM_FORMAT_NLdSt(30)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
index 887b983..54fcadb 100644
--- a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # core registers out of range
 0xa5 0xba 0x72 0xed
diff --git a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
index a53f940..f961c64 100644
--- a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=1225 Name=VQADDsv16i8 Format=ARM_FORMAT_N3Reg(37)
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
index 8ff3a2b..2d2a628 100644
--- a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
index a12ca95..07a1c7a 100644
--- a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=1641 Name=VST2b32_UPD Format=ARM_FORMAT_NLdSt(30)
diff --git a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
index df0a642..c9f1cf1 100644
--- a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1894 Name=t2Bcc Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
index e1f841b8..eb415f7 100644
--- a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1922 Name=t2LDRBT Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
index 7c0efab..6c13560 100644
--- a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=1934 Name=t2LDREXD Format=ARM_FORMAT_THUMBFRM(25)
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
index a63d121..7f84e08 100644
--- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1953 Name=t2LDRSHi12 Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
index f126ff0..e44cf95 100644
--- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=1954 Name=t2LDRSHi8 Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt
index b3daa9a..8c0d48b 100644
--- a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # SP and PC are not allowed in the register list on STM instructions in Thumb2.
 
diff --git a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
index 2198efc..64ba368 100644
--- a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=2124 Name=t2STRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
index 3f406d4..243c11d 100644
--- a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 # XFAIL: *
 
 # Opcode=2127 Name=t2STREXB Format=ARM_FORMAT_THUMBFRM(25)
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
index 0f9a16e..7a7c4a5 100644
--- a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=2128 Name=t2STREXD Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
index 548ad05..2ad3e7d 100644
--- a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # Opcode=2137 Name=t2STR_POST Format=ARM_FORMAT_THUMBFRM(25)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/ldrd-armv4.txt b/test/MC/Disassembler/ARM/ldrd-armv4.txt
index bb87ade..f2fff3f 100644
--- a/test/MC/Disassembler/ARM/ldrd-armv4.txt
+++ b/test/MC/Disassembler/ARM/ldrd-armv4.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi |& FileCheck %s -check-prefix=V4
-# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi |& FileCheck %s -check-prefix=V5TE
+# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=V4
+# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=V5TE
 
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
 # -------------------------------------------------------------------------------------------------
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index f44c2a0..a7b6b1c 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
 
 # CHECK:	vbif	q15, q7, q0
 0x50 0xe1 0x7e 0xf3
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index c5dbee3..649424a 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple armv7-unknown-unknown -disassemble -mattr +fp16 < %s | FileCheck %s
+# RUN: llvm-mc -triple armv7-unknown-unknown -mcpu=cortex-a9 -disassemble < %s | FileCheck %s
 
 0x20 0x03 0xf1 0xf3
 # CHECK: vabs.s8	d16, d16
@@ -1734,6 +1734,25 @@
 0xcf 0x1a 0xe0 0xf4
 # CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0]
 
+# CHECK: vld3.8	{d0[], d1[], d2[]}, [r4]
+0x0f 0x0e 0xa4 0xf4
+# CHECK: vld3.8	{d0[], d1[], d2[]}, [r4]!
+0x0d 0x0e 0xa4 0xf4
+# CHECK: vld3.8	{d0[], d2[], d4[]}, [r4], r5
+0x25 0x0e 0xa4 0xf4
+# CHECK: vld3.16	{d0[], d2[], d4[]}, [r4]
+0x6f 0x0e 0xa4 0xf4
+# CHECK: vld3.16	{d0[], d1[], d2[]}, [r4]!
+0x4d 0x0e 0xa4 0xf4
+# CHECK: vld3.16	{d0[], d2[], d4[]}, [r4], r5
+0x65 0x0e 0xa4 0xf4
+# CHECK: vld3.32	{d0[], d1[], d2[]}, [r4]
+0x8f 0x0e 0xa4 0xf4
+# CHECK: vld3.32	{d0[], d1[], d2[]}, [r4]!
+0x8d 0x0e 0xa4 0xf4
+# CHECK: vld3.32	{d0[], d2[], d4[]}, [r4], r5
+0xa5 0x0e 0xa4 0xf4
+
 0x3f 0x03 0xe0 0xf4
 # CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
 0x4f 0x07 0xe0 0xf4
@@ -1745,6 +1764,30 @@
 0x4f 0x1b 0xe0 0xf4
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
+0x0f 0x0f 0xa4 0xf4
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4]
+0x3f 0x0f 0xa4 0xf4
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32]
+0x1d 0x0f 0xa4 0xf4
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]!
+0x35 0x0f 0xa4 0xf4
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5
+0x4f 0x0f 0xa4 0xf4
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4]
+0x7f 0x0f 0xa4 0xf4
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64]
+0x5d 0x0f 0xa4 0xf4
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]!
+0x75 0x0f 0xa4 0xf4
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5
+0x8f 0x0f 0xa4 0xf4
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4]
+0xbf 0x0f 0xa4 0xf4
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64]
+0xdd 0x0f 0xa4 0xf4
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]!
+0xf5 0x0f 0xa4 0xf4
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5
 
 
 0x1f 0x07 0x40 0xf4
@@ -1852,7 +1895,26 @@
 # CHECK: vst4.8	{d0[0], d1[0], d2[0], d3[0]}, [r0]!
 
 0x3d 0x2a 0x5e 0x6c
-# CHECK: vmovvs	r2, lr, s29, s30
+# CHECK: vmovvs	r2, lr, s27, s28
+
+0x31 0x1a 0x42 0xec
+0x11 0x1a 0x42 0xec
+0x31 0x1a 0x52 0xec
+0x11 0x1a 0x52 0xec
+# CHECK: vmov s3, s4, r1, r2
+# CHECK: vmov s2, s3, r1, r2
+# CHECK: vmov r1, r2, s3, s4
+# CHECK: vmov r1, r2, s2, s3
+
+0x1f 0x1b 0x42 0xec
+0x30 0x1b 0x42 0xec
+0x1f 0x1b 0x52 0xec
+0x30 0x1b 0x52 0xec
+# CHECK: vmov d15, r1, r2 
+# CHECK: vmov d16, r1, r2
+# CHECK: vmov r1, r2, d15
+# CHECK: vmov r1, r2, d16
+
 
 0xe9 0x1a 0xb2 0x4e
 # CHECK: vcvttmi.f32.f16	s2, s19
@@ -1869,14 +1931,6 @@
 # CHECK: vmov.f32	d0, #1.600000e+01
 # CHECK: vmov.f32	q0, #1.600000e+01
 
-# rdar://10798451
-0xe7 0xf9 0x32 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
-0xe7 0xf9 0x3d 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
-0xe7 0xf9 0x3f 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
-
 # rdar://11034702
 0x0d 0x87 0x04 0xf4
 # CHECK: vst1.8	{d8}, [r4]!            
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 65cd230..7d7010f 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple thumbv7-unknown-unknown -disassemble -mattr +fp16 < %s | FileCheck %s
+# RUN: llvm-mc -triple thumbv7-unknown-unknown -mcpu=cortex-a9 -disassemble < %s | FileCheck %s
 
 0xf1 0xff 0x20 0x03
 # CHECK: vabs.s8	d16, d16
@@ -1475,6 +1475,25 @@
 0xe0 0xf9 0xcf 0x1a
 # CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0]
 
+0xa4 0xf9 0x0f 0x0e
+# CHECK: vld3.8	{d0[], d1[], d2[]}, [r4]
+0xa4 0xf9 0x0d 0x0e
+# CHECK: vld3.8	{d0[], d1[], d2[]}, [r4]!
+0xa4 0xf9 0x25 0x0e
+# CHECK: vld3.8	{d0[], d2[], d4[]}, [r4], r5
+0xa4 0xf9 0x6f 0x0e
+# CHECK: vld3.16	{d0[], d2[], d4[]}, [r4]
+0xa4 0xf9 0x4d 0x0e
+# CHECK: vld3.16	{d0[], d1[], d2[]}, [r4]!
+0xa4 0xf9 0x65 0x0e
+# CHECK: vld3.16	{d0[], d2[], d4[]}, [r4], r5
+0xa4 0xf9 0x8f 0x0e
+# CHECK: vld3.32	{d0[], d1[], d2[]}, [r4]
+0xa4 0xf9 0x8d 0x0e
+# CHECK: vld3.32	{d0[], d1[], d2[]}, [r4]!
+0xa4 0xf9 0xa5 0x0e
+# CHECK: vld3.32	{d0[], d2[], d4[]}, [r4], r5
+
 0xe0 0xf9 0x3f 0x03
 # CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
 0xe0 0xf9 0x4f 0x07
@@ -1486,6 +1505,31 @@
 0xe0 0xf9 0x4f 0x1b
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
+0xa4 0xf9 0x0f 0x0f
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4] 
+0xa4 0xf9 0x3f 0x0f
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32] 
+0xa4 0xf9 0x1d 0x0f
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]! 
+0xa4 0xf9 0x35 0x0f
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5 
+0xa4 0xf9 0x4f 0x0f
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4] 
+0xa4 0xf9 0x7f 0x0f
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+0xa4 0xf9 0x5d 0x0f
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]! 
+0xa4 0xf9 0x75 0x0f
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5 
+0xa4 0xf9 0x8f 0x0f
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4] 
+0xa4 0xf9 0xbf 0x0f
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+0xa4 0xf9 0xdd 0x0f
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]! 
+0xa4 0xf9 0xf5 0x0f
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5 
+
 0x40 0xf9 0x1f 0x07
 # CHECK: vst1.8	{d16}, [r0, :64]
 0x40 0xf9 0x4f 0x07
@@ -1998,3 +2042,13 @@
 # CHECK: vld2.16	{d0[], d2[]}, [r3], r4  
 0xa3 0xf9 0xa4 0x0d
 # CHECK: vld2.32	{d0[], d2[]}, [r3], r4  
+
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]
+
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index 18b8f47..c08585a 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mattr +t2xtpk,+mp | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mcpu=cortex-a9-mp | FileCheck %s
 
 # CHECK:	add	r5, sp, #68
 0x11 0xad
@@ -301,3 +301,11 @@
 
 # CHECK: mrs    r0, apsr
 0xef 0xf3 0x00 0x80
+
+# rdar://11313994
+# CHECK: blx	#2313244
+0x34 0xf2 0x0e 0xee
+
+# rdar://11324693
+# CHECK: bl	#-12303196
+0x44 0xf4 0x52 0xda
diff --git a/test/MC/Disassembler/ARM/thumb1.txt b/test/MC/Disassembler/ARM/thumb1.txt
index 17c4bad..5b70262 100644
--- a/test/MC/Disassembler/ARM/thumb1.txt
+++ b/test/MC/Disassembler/ARM/thumb1.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=thumbv6-apple-darwin -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=thumbv6-apple-darwin -disassemble -show-encoding < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # ADC (register)
@@ -83,6 +83,15 @@
 0xb1 0x43
 
 #------------------------------------------------------------------------------
+# B
+#------------------------------------------------------------------------------
+# CHECK: bls     #128                    @ encoding: [0x40,0xd9]
+# CHECK: beq     #-256                   @ encoding: [0x80,0xd0]
+
+0x40 0xd9
+0x80 0xd0
+
+#------------------------------------------------------------------------------
 # BKPT
 #------------------------------------------------------------------------------
 # CHECK: bkpt #0
@@ -516,15 +525,3 @@
 
 0xd7 0xb2
 0xa1 0xb2
-
-
-#------------------------------------------------------------------------------
-# WFE/WFI/YIELD
-#------------------------------------------------------------------------------
-# CHECK: wfe
-# CHECK: wfi
-# CHECK: yield
-
-0x20 0xbf
-0x30 0xbf
-0x10 0xbf
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index ed8d988..42ebe58 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=thumbv7-apple-darwin -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # ADC (immediate)
@@ -92,9 +92,11 @@
 #------------------------------------------------------------------------------
 # CHECK: subw r11, pc, #3270
 # CHECK: subw r11, pc, #826
+# CHECK: subw r1, pc, #0
 
 0xaf 0xf6 0xc6 0x4b
 0xaf 0xf2 0x3a 0x3b
+0xaf 0xf2 0x00 0x01
 
 #------------------------------------------------------------------------------
 # AND (immediate)
@@ -344,23 +346,37 @@
 #------------------------------------------------------------------------------
 #CHECK: dmb sy
 #CHECK: dmb st
+#CHECK: dmb #0xd
+#CHECK: dmb #0xc
 #CHECK: dmb ish
 #CHECK: dmb ishst
+#CHECK: dmb #0x9
+#CHECK: dmb #0x8
 #CHECK: dmb nsh
 #CHECK: dmb nshst
+#CHECK: dmb #0x5
+#CHECK: dmb #0x4
 #CHECK: dmb osh
 #CHECK: dmb oshst
-#CHECK: dmb
+#CHECK: dmb #0x1
+#CHECK: dmb #0x0
 
 0xbf 0xf3 0x5f 0x8f
 0xbf 0xf3 0x5e 0x8f
+0xbf 0xf3 0x5d 0x8f
+0xbf 0xf3 0x5c 0x8f
 0xbf 0xf3 0x5b 0x8f
 0xbf 0xf3 0x5a 0x8f
+0xbf 0xf3 0x59 0x8f
+0xbf 0xf3 0x58 0x8f
 0xbf 0xf3 0x57 0x8f
 0xbf 0xf3 0x56 0x8f
+0xbf 0xf3 0x55 0x8f
+0xbf 0xf3 0x54 0x8f
 0xbf 0xf3 0x53 0x8f
 0xbf 0xf3 0x52 0x8f
-0xbf 0xf3 0x5f 0x8f
+0xbf 0xf3 0x51 0x8f
+0xbf 0xf3 0x50 0x8f
 
 
 #------------------------------------------------------------------------------
@@ -368,21 +384,37 @@
 #------------------------------------------------------------------------------
 #CHECK: dsb sy
 #CHECK: dsb st
+#CHECK: dsb #0xd
+#CHECK: dsb #0xc
 #CHECK: dsb ish
 #CHECK: dsb ishst
+#CHECK: dsb #0x9
+#CHECK: dsb #0x8
 #CHECK: dsb nsh
 #CHECK: dsb nshst
+#CHECK: dsb #0x5
+#CHECK: dsb #0x4
 #CHECK: dsb osh
 #CHECK: dsb oshst
+#CHECK: dsb #0x1
+#CHECK: dsb #0x0
 
 0xbf 0xf3 0x4f 0x8f
 0xbf 0xf3 0x4e 0x8f
+0xbf 0xf3 0x4d 0x8f
+0xbf 0xf3 0x4c 0x8f
 0xbf 0xf3 0x4b 0x8f
 0xbf 0xf3 0x4a 0x8f
+0xbf 0xf3 0x49 0x8f
+0xbf 0xf3 0x48 0x8f
 0xbf 0xf3 0x47 0x8f
 0xbf 0xf3 0x46 0x8f
+0xbf 0xf3 0x45 0x8f
+0xbf 0xf3 0x44 0x8f
 0xbf 0xf3 0x43 0x8f
 0xbf 0xf3 0x42 0x8f
+0xbf 0xf3 0x41 0x8f
+0xbf 0xf3 0x40 0x8f
 
 
 #------------------------------------------------------------------------------
@@ -609,6 +641,9 @@
 # CHECK: ldrd r3, r5, [r6], #-8
 # CHECK: ldrd r3, r5, [r6]
 # CHECK: ldrd r8, r1, [r3]
+# CHECK: ldrd r0, r1, [r2], #-0
+# CHECK: ldrd r0, r1, [r2, #-0]!
+# CHECK: ldrd r0, r1, [r2, #-0]
 
 0xd6 0xe9 0x06 0x35
 0xf6 0xe9 0x06 0x35
@@ -616,6 +651,9 @@
 0x76 0xe8 0x02 0x35
 0xd6 0xe9 0x00 0x35
 0xd3 0xe9 0x00 0x81
+0x72 0xe8 0x00 0x01
+0x72 0xe9 0x00 0x01
+0x52 0xe9 0x00 0x01
 
 
 #------------------------------------------------------------------------------
@@ -1790,12 +1828,16 @@
 # STRD (immediate)
 #------------------------------------------------------------------------------
 # CHECK: strd r6, r3, [r5], #-8
-# CHECK: strd r8, r5, [r5]{{$}}
+# CHECK: strd r8, r5, [r5], #-0
 # CHECK: strd r7, r4, [r5], #-4
+# CHECK: strd r0, r1, [r2, #-0]!
+# CHECK: strd r0, r1, [r2, #-0]
 
 0x65 0xe8 0x02 0x63
 0x65 0xe8 0x00 0x85
 0x65 0xe8 0x01 0x74
+0x62 0xe9 0x00 0x01
+0x42 0xe9 0x00 0x01
 
 #------------------------------------------------------------------------------
 # STREX/STREXB/STREXH/STREXD
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
index 275bae2f..d5c8cbb 100644
--- a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0x1f 0x12 0xb0 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
index 635b66e..d251eb4 100644
--- a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0xd1 0xf1 0x5f 0x01
diff --git a/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt b/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt
new file mode 100644
index 0000000..d0cb520
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt
@@ -0,0 +1,62 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=CHECK-WARN
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x03 0xaf 0x06
+# CHECK: sxtb
+0x74 0x03 0xaf 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xbf 0x06
+# CHECK: sxth
+0x74 0x3f 0xbf 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xa6 0x06
+# CHECK: sxtab
+0x74 0x3f 0xa6 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xb7 0x06
+# CHECK: sxtah
+0x74 0x3f 0xb7 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0x8f 0x06
+# CHECK: sxtb16
+0x74 0x3f 0x8f 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0x86 0x06
+# CHECK: sxtab16
+0x74 0x3f 0x86 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xef 0x06
+# CHECK: uxtb
+0x74 0x3f 0xef 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xff 0x06
+# CHECK: uxth
+0x74 0x3f 0xff 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xcf 0x06
+# CHECK: uxtb16
+0x74 0x3f 0xcf 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xe4 0x06
+# CHECK: uxtab
+0x74 0x3f 0xe4 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xf2 0x06
+# CHECK: uxtah
+0x74 0x3f 0xf2 0x06
+
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x74 0x3f 0xc4 0x06
+# CHECK: uxtab16
+0x74 0x3f 0xc4 0x06
diff --git a/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt b/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt
index dac4390..554ae53 100644
--- a/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0x01 0x10 0x50 0x03
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
index ed5e350..66073a8 100644
--- a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0xff 0x00 0xb9 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
index a8f54f7..572d844 100644
--- a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
 # -------------------------------------------------------------------------------------------------
diff --git a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
index f7d6bc6..9c26953 100644
--- a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt
index 26b286d..439aaed 100644
--- a/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0x00 0x10 0x51 0xfc
diff --git a/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt
index 3e472cd..d785341 100644
--- a/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # CHECK: warning: potentially undefined
 # CHECK: 0x00 0xf0 0x0f 0x01
diff --git a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
index 3db86cc..472868f 100644
--- a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0x93 0x12 0x01 0x00 
diff --git a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
index 5b13610..fdfda6d 100644
--- a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt
new file mode 100644
index 0000000..a2a8770
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0xb4 0x38 0x80 0x06
+0xb4 0x38 0x80 0x06
diff --git a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
index 8ec49ca..741d059 100644
--- a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # CHECK: warning: potentially undefined
 # CHECK: shadd16	r5, r7, r0
diff --git a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
index 874378e..832aa3f 100644
--- a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
index fef6125..5e62802 100644
--- a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
index 4c4c9ab..85b52dd 100644
--- a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
 
 # Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
diff --git a/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt b/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt
index 64bb171..eef5d9f 100644
--- a/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
 
 # CHECK: potentially undefined
 # CHECK: 0x9f 0x10 0x03 0x01
diff --git a/test/MC/Disassembler/ARM/unpredictables-thumb.txt b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
index e7645f0..925dcd3 100644
--- a/test/MC/Disassembler/ARM/unpredictables-thumb.txt
+++ b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=thumbv7 2>&1 | FileCheck %s
 
 0x01 0x47
 # CHECK: 3:1: warning: potentially undefined
diff --git a/test/MC/Disassembler/Mips/lit.local.cfg b/test/MC/Disassembler/Mips/lit.local.cfg
new file mode 100644
index 0000000..9b698b2
--- /dev/null
+++ b/test/MC/Disassembler/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index 591d8c4..a193319 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -1,421 +1,406 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x05
 
-# CHECK: abs.d $f12,$f14
-0x46 0x20 0x39 0x85
-
-# CHECK: abs.s $f6,$f7
+# CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x85
 
-# CHECK: add t1,a2,a3
+# CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x20
 
-# CHECK: add.d $f18,$f12,$f14
-0x46 0x27 0x32 0x40
+# CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x00
 
-# CHECK: add.s $f9,$f6,$f7
+# CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x40
 
-# CHECK: addi t1,a2,17767
+# CHECK: addi $9, $6, 17767
 0x20 0xc9 0x45 0x67
 
-# CHECK: addiu t1,a2,-15001
+# CHECK: addiu $9, $6, -15001
 0x24 0xc9 0xc5 0x67
 
-# CHECK: addu t1,a2,a3
+# CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x21
 
-# CHECK: and t1,a2,a3
+# CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x24
 
-# CHECK: andi t1,a2,0x4567
+# CHECK: andi $9, $6, 17767
 0x30 0xc9 0x45 0x67
 
-# CHECK: b 00000534
+# CHECK: b 1332
 0x10 0x00 0x01 0x4c
 
-# CHECK: bal 00000534
-0x04 0x11 0x01 0x4c
-
-# CHECK: bc1f 00000534
+# CHECK: bc1f 1332
 0x45 0x00 0x01 0x4c
 
-# CHECK: bc1t 00000534
+# CHECK: bc1t 1332
 0x45 0x01 0x01 0x4c
 
-# CHECK: beq t1,a2,00000534
+# CHECK: beq $9, $6, 1332
 0x11 0x26 0x01 0x4c
 
-# CHECK: bgez a2,00000534
+# CHECK: bgez  $6, 1332
 0x04 0xc1 0x01 0x4c
 
-# CHECK: bgezal a2,00000534
+# CHECK: bgezal  $6, 1332
 0x04 0xd1 0x01 0x4c
 
-# CHECK: bgtz a2,00000534
+# CHECK: bgtz  $6, 1332
 0x1c 0xc0 0x01 0x4c
 
-# CHECK: blez a2,00000534
+# CHECK: blez  $6, 1332
 0x18 0xc0 0x01 0x4c
 
-# CHECK: bne t1,a2,00000534
+# CHECK: bne $9, $6, 1332
 0x15 0x26 0x01 0x4c
 
-# CHECK: c.eq.d $f12,$f14
-0x46 0x27 0x30 0x32
+# CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x32
 
-# CHECK: c.eq.s $f6,$f7
+# CHECK: c.eq.s $f6, $f7
 0x46 0x07 0x30 0x32
 
-# CHECK: c.f.d $f12,$f14
-0x46 0x27 0x30 0x30
+# CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x30
 
-# CHECK: c.f.s $f6,$f7
+# CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x30
 
-# CHECK: c.le.d $f12,$f14
-0x46 0x27 0x30 0x3e
+# CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3e
 
-# CHECK: c.le.s $f6,$f7
+# CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3e
 
-# CHECK: c.lt.d $f12,$f14
-0x46 0x27 0x30 0x3c
+# CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3c
 
-# CHECK: c.lt.s $f6,$f7
+# CHECK: c.lt.s $f6, $f7
 0x46 0x07 0x30 0x3c
 
-# CHECK: c.nge.d $f12,$f14
-0x46 0x27 0x30 0x3d
+# CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3d
 
-# CHECK: c.nge.s $f6,$f7
+# CHECK: c.nge.s $f6, $f7
 0x46 0x07 0x30 0x3d
 
-# CHECK: c.ngl.d $f12,$f14
-0x46 0x27 0x30 0x3b
+# CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3b
 
-# CHECK: c.ngl.s $f6,$f7
+# CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3b
 
-# CHECK: c.ngle.d $f12,$f14
-0x46 0x27 0x30 0x39
+# CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x39
 
-# CHECK: c.ngle.s $f6,$f7
+# CHECK: c.ngle.s $f6, $f7
 0x46 0x07 0x30 0x39
 
-# CHECK: c.ngt.d $f12,$f14
-0x46 0x27 0x30 0x3f
+# CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x60 0x3f
 
-# CHECK: c.ngt.s $f6,$f7
+# CHECK: c.ngt.s $f6, $f7
 0x46 0x07 0x30 0x3f
 
-# CHECK: c.ole.d $f12,$f14
-0x46 0x27 0x30 0x36
+# CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x36
 
-# CHECK: c.ole.s $f6,$f7
+# CHECK: c.ole.s $f6, $f7
 0x46 0x07 0x30 0x36
 
-# CHECK: c.olt.d $f12,$f14
-0x46 0x27 0x30 0x34
+# CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x34
 
-# CHECK: c.olt.s $f6,$f7
+# CHECK: c.olt.s $f6, $f7
 0x46 0x07 0x30 0x34
 
-# CHECK: c.seq.d $f12,$f14
-0x46 0x27 0x30 0x3a
+# CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3a
 
-# CHECK: c.seq.s $f6,$f7
+# CHECK: c.seq.s $f6, $f7
 0x46 0x07 0x30 0x3a
 
-# CHECK: c.sf.d $f12,$f14
-0x46 0x27 0x30 0x38
+# CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x38
 
-# CHECK: c.sf.s $f6,$f7
+# CHECK: c.sf.s $f6, $f7
 0x46 0x07 0x30 0x38
 
-# CHECK: c.ueq.d $f12,$f14
-0x46 0x27 0x30 0x33
+# CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x33
 
-# CHECK: c.ueq.s $f28,$f18
+# CHECK: c.ueq.s $f28, $f18
 0x46 0x12 0xe0 0x33
 
-# CHECK: c.ule.d $f12,$f14
-0x46 0x27 0x30 0x37
+# CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x37
 
-# CHECK: c.ule.s $f6,$f7
+# CHECK: c.ule.s $f6, $f7
 0x46 0x07 0x30 0x37
 
-# CHECK: c.ult.d $f12,$f14
-0x46 0x27 0x30 0x35
+# CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x35
 
-# CHECK: c.ult.s $f6,$f7
+# CHECK: c.ult.s $f6, $f7
 0x46 0x07 0x30 0x35
 
-# CHECK: c.un.d $f12,$f14
-0x46 0x27 0x30 0x31
+# CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x31
 
-# CHECK: c.un.s $f6,$f7
+# CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x31
 
-# CHECK: ceil.w.d $f12,$f14
-0x46 0x20 0x39 0x8e
+# CHECK: ceil.w.d $f12, $f14
+0x46 0x20 0x73 0x0e
 
-# CHECK: ceil.w.s $f6,$f7
+# CHECK: ceil.w.s $f6, $f7
 0x46 0x00 0x39 0x8e
 
-# CHECK: cfc1 a2,$7
+# CHECK: cfc1  $6, $7
 0x44 0x46 0x38 0x00
 
-# CHECK: clo a2,a3
+# CHECK: clo  $6, $7
 0x70 0xe6 0x30 0x21
 
-# CHECK: clz a2,a3
+# CHECK: clz  $6, $7
 0x70 0xe6 0x30 0x20
 
-# CHECK: ctc1 a2,$7
+# CHECK: ctc1  $6, $7
 0x44 0xc6 0x38 0x00
 
-# CHECK: cvt.d.s $f6,$f7
-0x46 0x00 0x38 0xa1
-
-# CHECK: cvt.d.w $f12,$f14
-0x46 0x80 0x38 0xa1
-
-# CHECK: cvt.l.d $f12,$f14
-0x46 0x20 0x39 0xa5
+# CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa1
 
-# CHECK: cvt.l.s $f6,$f7
-0x46 0x00 0x39 0xa5
+# CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x73 0x21
 
-# CHECK: cvt.s.d $f12,$f14
-0x46 0x20 0x39 0xa0
+# CHECK: cvt.s.d $f12, $f14
+0x46 0x20 0x73 0x20
 
-# CHECK: cvt.s.w $f6,$f7
+# CHECK: cvt.s.w $f6, $f7
 0x46 0x80 0x39 0xa0
 
-# CHECK: cvt.w.d $f12,$f14
-0x46 0x20 0x39 0xa4
+# CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x73 0x24
 
-# CHECK: cvt.w.s $f6,$f7
+# CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa4
 
-# CHECK: floor.w.d $f12,$f14
-0x46 0x20 0x39 0x8f
+# CHECK: floor.w.d $f12, $f14
+0x46 0x20 0x73 0x0f
 
-# CHECK: floor.w.s $f6,$f7
+# CHECK: floor.w.s $f6, $f7
 0x46 0x00 0x39 0x8f
 
-# CHECK: j 00000530
+# CHECK: j 1328
 0x08 0x00 0x01 0x4c
 
-# CHECK: jal 00000530
+# CHECK: jal 1328
 0x0c 0x00 0x01 0x4c
 
-# CHECK: jalr a2,a3
+# CHECK: jalr  $7
 0x00 0xe0 0xf8 0x09
 
-# CHECK: jr a3
+# CHECK: jr  $7
 0x00 0xe0 0x00 0x08
 
-# CHECK: lb  a0,9158(a1)
+# CHECK: lb  $4, 9158($5)
 0x80 0xa4 0x23 0xc6
 
-# CHECK: lbu a0,6(a1)
+# CHECK: lbu $4, 6($5)
 0x90 0xa4 0x00 0x06
 
-# CHECK: ldc1  $f9,9158(a3)
+# CHECK: ldc1  $f9, 9158($7)
 0xd4 0xe9 0x23 0xc6
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x84 0xa4 0x00 0x0c
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x84 0xa4 0x00 0x0c
 
-# CHECK: li  v1,17767
-0x24 0x03 0x45 0x67
-
-# CHECK: ll  t1,9158(a3)
+# CHECK: ll  $9, 9158($7)
 0xc0 0xe9 0x23 0xc6
 
-# CHECK: lui a2,0x4567
+# CHECK: lui  $6, 17767
 0x3c 0x06 0x45 0x67
 
-# CHECK: lw  a0,24(a1)
+# CHECK: lw  $4, 24($5)
 0x8c 0xa4 0x00 0x18
 
-# CHECK: lwc1  $f9,9158(a3)
+# CHECK: lwc1  $f9, 9158($7)
 0xc4 0xe9 0x23 0xc6
 
-# CHECK: madd  a2,a3
+# CHECK: lwl   $2,  3($4)
+0x88 0x82 0x00 0x03
+
+# CHECK: lwr   $3, 16($5)
+0x98 0xa3 0x00 0x10
+
+# CHECK: madd   $6,  $7
 0x70 0xc7 0x00 0x00
 
-# CHECK: maddu a2,a3
+# CHECK: maddu  $6,  $7
 0x70 0xc7 0x00 0x01
 
-# CHECK: mfc1  a2,$f7
+# CHECK: mfc1   $6, $f7
 0x44 0x06 0x38 0x00
 
-# CHECK: mfhi  a1
+# CHECK: mfhi  $5
 0x00 0x00 0x28 0x10
 
-# CHECK: mflo  a1
+# CHECK: mflo  $5
 0x00 0x00 0x28 0x12
 
-# CHECK: mov.d $f6,$f7
-0x46 0x20 0x39 0x86
+# CHECK: mov.d $f6, $f8
+0x46 0x20 0x41 0x86
 
-# CHECK: mov.s $f6,$f7
+# CHECK: mov.s $f6, $f7
 0x46 0x00 0x39 0x86
 
-# CHECK: move  a2,a1
-0x00 0xa0 0x30 0x21
-
-# CHECK: msub  a2,a3
+# CHECK: msub   $6,  $7
 0x70 0xc7 0x00 0x04
 
-# CHECK: msubu a2,a3
+# CHECK: msubu  $6,  $7
 0x70 0xc7 0x00 0x05
 
-# CHECK: mtc1  a2,$f7
+# CHECK: mtc1   $6, $f7
 0x44 0x86 0x38 0x00
 
-# CHECK: mthi  a3
+# CHECK: mthi   $7
 0x00 0xe0 0x00 0x11
 
-# CHECK: mtlo  a3
+# CHECK: mtlo   $7
 0x00 0xe0 0x00 0x13
 
-# CHECK: mul.d $f9,$f12,$f14
-0x46 0x27 0x32 0x42
+# CHECK: mul.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02
 
-# CHECK: mul.s $f9,$f6,$f7
+# CHECK: mul.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42
 
-# CHECK: mul t1,a2,a3
+# CHECK: mul $9,  $6,  $7
 0x70 0xc7 0x48 0x02
 
-# CHECK: mult  v1,a1
+# CHECK: mult  $3, $5
 0x00 0x65 0x00 0x18
 
-# CHECK: multu v1,a1
+# CHECK: multu $3, $5
 0x00 0x65 0x00 0x19
 
-# CHECK: neg.d $f12,$f14
-0x46 0x20 0x39 0x87
+# CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x07
 
-# CHECK: neg.s $f6,$f7
+# CHECK: neg.s $f6, $f7
 0x46 0x00 0x39 0x87
 
-# CHECK: neg v1,a1
-0x00 0x05 0x18 0x22
-
 # CHECK: nop
 0x00 0x00 0x00 0x00
 
-# CHECK: nor t1,a2,a3
+# CHECK: nor $9,  $6, $7
 0x00 0xc7 0x48 0x27
 
-# CHECK: not v1,a1
-0x00 0xa0 0x18 0x27
-
-# CHECK: or  v1,v1,a1
+# CHECK: or  $3, $3, $5
 0x00 0x65 0x18 0x25
 
-# CHECK: ori t1,a2,0x4567
+# CHECK: ori $9,  $6, 17767
 0x34 0xc9 0x45 0x67
 
-# CHECK: rdhwr a2,$29
-0x7c 0x06 0xe8 0x3b
+# CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0c
 
-# CHECK: round.w.d $f12,$f14
-0x46 0x20 0x39 0x8c
-
-# CHECK: round.w.s $f6,$f7
+# CHECK: round.w.s $f6, $f7
 0x46 0x00 0x39 0x8c
 
-# CHECK: sb  a0,9158(a1)
+# CHECK: sb  $4, 9158($5)
 0xa0 0xa4 0x23 0xc6
 
-# CHECK: sb  a0,6(a1)
+# CHECK: sb  $4, 6($5)
 0xa0 0xa4 0x00 0x06
 
-# CHECK: sc  t1,9158(a3)
+# CHECK: sc  $9, 9158($7)
 0xe0 0xe9 0x23 0xc6
 
-# CHECK: sdc1  $f9,9158(a3)
+# CHECK: sdc1  $f9, 9158($7)
 0xf4 0xe9 0x23 0xc6
 
-# CHECK: sh  a0,9158(a1)
+# CHECK: sh  $4, 9158($5)
 0xa4 0xa4 0x23 0xc6
 
-# CHECK: sll a0,v1,0x7
+# CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc0
 
-# CHECK: sllv  v0,v1,a1
+# CHECK: sllv  $2, $3, $5
 0x00 0xa3 0x10 0x04
 
-# CHECK: slt v1,v1,a1
+# CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2a
 
-# CHECK: slti  v1,v1,103
+# CHECK: slti  $3, $3, 103
 0x28 0x63 0x00 0x67
 
-# CHECK: sltiu v1,v1,103
+# CHECK: sltiu $3, $3, 103
 0x2c 0x63 0x00 0x67
 
-# CHECK: sltu  v1,v1,a1
+# CHECK: sltu  $3, $3, $5
 0x00 0x65 0x18 0x2b
 
-# CHECK: sqrt.d  $f12,$f14
-0x46 0x20 0x39 0x84
+# CHECK: sqrt.d  $f12, $f14
+0x46 0x20 0x73 0x04
 
-# CHECK: sqrt.s  $f6,$f7
+# CHECK: sqrt.s  $f6, $f7
 0x46 0x00 0x39 0x84
 
-# CHECK: sra a0,v1,0x7
+# CHECK: sra $4, $3, 7
 0x00 0x03 0x21 0xc3
 
-# CHECK: sra a0,v1,0x7
-0x00 0x03 0x21 0xc3
-
-# CHECK: srav  v0,v1,a1
+# CHECK: srav  $2, $3, $5
 0x00 0xa3 0x10 0x07
 
-# CHECK: srl a0,v1,0x7
+# CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc2
 
-# CHECK: srlv  v0,v1,a1
+# CHECK: srlv  $2, $3, $5
 0x00 0xa3 0x10 0x06
 
-# CHECK: sub.d $f9,$f12,$f14
-0x46 0x27 0x32 0x41
+# CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01
 
-# CHECK: sub.s $f9,$f6,$f7
+# CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41
 
-# CHECK: sub t1,a2,a3
+# CHECK: sub $9,  $6, $7
 0x00 0xc7 0x48 0x22
 
-# CHECK: subu  a0,v1,a1
+# CHECK: subu  $4, $3, $5
 0x00 0x65 0x20 0x23
 
-# CHECK: sw  a0,24(a1)
+# CHECK: sw  $4, 24($5)
 0xac 0xa4 0x00 0x18
 
-# CHECK: swc1  $f9,9158(a3)
+# CHECK: swc1  $f9, 9158($7)
 0xe4 0xe9 0x23 0xc6
 
-# CHECK: sync  0x7
+# CHECK: swl $4,  16($5)
+0xa8 0xa4 0x00 0x10
+
+# CHECK: swr $6, 16($7)
+0xb8 0xe6 0x00 0x10
+
+# CHECK: sync  7
 0x00 0x00 0x01 0xcf
 
-# CHECK: trunc.w.d $f12,$f14
-0x46 0x20 0x39 0x8d
+# CHECK: trunc.w.d $f12, $f14
+0x46 0x20 0x73 0x0d
 
-# CHECK: trunc.w.s $f6,$f7
+# CHECK: trunc.w.s $f6, $f7
 0x46 0x00 0x39 0x8d
 
-# CHECK: xor v1,v1,a1
+# CHECK: xor $3, $3, $5
 0x00 0x65 0x18 0x26
 
-# CHECK: xori  t1,a2,0x4567
+# CHECK: xori  $9,  $6, 17767
 0x38 0xc9 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index a5a3cfd..08b3672 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -1,424 +1,406 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: abs.d $f12, $f14
+0x05 0x73 0x20 0x46
 
-# CHECK: abs.d $f12,$f14
-0x85 0x39 0x20 0x46
-
-# CHECK: abs.s $f6,$f7
+# CHECK: abs.s $f6, $f7
 0x85 0x39 0x00 0x46
 
-# CHECK: add t1,a2,a3
+# CHECK: add $9, $6, $7
 0x20 0x48 0xc7 0x00
 
-# CHECK: add.d $f18,$f12,$f14
-0x40 0x32 0x27 0x46
+# CHECK: add.d $f8, $f12, $f14
+0x00 0x62 0x2e 0x46
 
-# CHECK: add.s $f9,$f6,$f7
+# CHECK: add.s $f9, $f6, $f7
 0x40 0x32 0x07 0x46
 
-# CHECK: addi t1,a2,17767
+# CHECK: addi $9, $6, 17767
 0x67 0x45 0xc9 0x20
 
-# CHECK: addiu t1,a2,-15001
+# CHECK: addiu $9, $6, -15001
 0x67 0xc5 0xc9 0x24
 
-# CHECK: addu t1,a2,a3
+# CHECK: addu $9, $6, $7
 0x21 0x48 0xc7 0x00
 
-# CHECK: and t1,a2,a3
+# CHECK: and $9, $6, $7
 0x24 0x48 0xc7 0x00
 
-# CHECK: andi t1,a2,0x4567
+# CHECK: andi $9, $6, 17767
 0x67 0x45 0xc9 0x30
 
-# CHECK: b 00000534
+# CHECK: b 1332
 0x4c 0x01 0x00 0x10
 
-# CHECK: bal 00000534
-0x4c 0x01 0x11 0x04
-
-# CHECK: bc1f 00000534
+# CHECK: bc1f 1332
 0x4c 0x01 0x00 0x45
 
-# CHECK: bc1t 00000534
+# CHECK: bc1t 1332
 0x4c 0x01 0x01 0x45
 
-# CHECK: beq t1,a2,00000534
+# CHECK: beq $9, $6, 1332
 0x4c 0x01 0x26 0x11
 
-# CHECK: bgez a2,00000534
+# CHECK: bgez  $6, 1332
 0x4c 0x01 0xc1 0x04
 
-# CHECK: bgezal a2,00000534
+# CHECK: bgezal  $6, 1332
 0x4c 0x01 0xd1 0x04
 
-# CHECK: bgtz a2,00000534
+# CHECK: bgtz  $6, 1332
 0x4c 0x01 0xc0 0x1c
 
-# CHECK: blez a2,00000534
+# CHECK: blez  $6, 1332
 0x4c 0x01 0xc0 0x18
 
-# CHECK: bne t1,a2,00000534
+# CHECK: bne $9, $6, 1332
 0x4c 0x01 0x26 0x15
 
-# CHECK: c.eq.d $f12,$f14
-0x32 0x30 0x27 0x46
+# CHECK: c.eq.d $f12, $f14
+0x32 0x60 0x2e 0x46
 
-# CHECK: c.eq.s $f6,$f7
+# CHECK: c.eq.s $f6, $f7
 0x32 0x30 0x07 0x46
 
-# CHECK: c.f.d $f12,$f14
-0x30 0x30 0x27 0x46
+# CHECK: c.f.d $f12, $f14
+0x30 0x60 0x2e 0x46
 
-# CHECK: c.f.s $f6,$f7
+# CHECK: c.f.s $f6, $f7
 0x30 0x30 0x07 0x46
 
-# CHECK: c.le.d $f12,$f14
-0x3e 0x30 0x27 0x46
+# CHECK: c.le.d $f12, $f14
+0x3e 0x60 0x2e 0x46
 
-# CHECK: c.le.s $f6,$f7
+# CHECK: c.le.s $f6, $f7
 0x3e 0x30 0x07 0x46
 
-# CHECK: c.lt.d $f12,$f14
-0x3c 0x30 0x27 0x46
+# CHECK: c.lt.d $f12, $f14
+0x3c 0x60 0x2e 0x46
 
-# CHECK: c.lt.s $f6,$f7
+# CHECK: c.lt.s $f6, $f7
 0x3c 0x30 0x07 0x46
 
-# CHECK: c.nge.d $f12,$f14
-0x3d 0x30 0x27 0x46
+# CHECK: c.nge.d $f12, $f14
+0x3d 0x60 0x2e 0x46
 
-# CHECK: c.nge.s $f6,$f7
+# CHECK: c.nge.s $f6, $f7
 0x3d 0x30 0x07 0x46
 
-# CHECK: c.ngl.d $f12,$f14
-0x3b 0x30 0x27 0x46
+# CHECK: c.ngl.d $f12, $f14
+0x3b 0x60 0x2e 0x46
 
-# CHECK: c.ngl.s $f6,$f7
+# CHECK: c.ngl.s $f6, $f7
 0x3b 0x30 0x07 0x46
 
-# CHECK: c.ngle.d $f12,$f14
-0x39 0x30 0x27 0x46
+# CHECK: c.ngle.d $f12, $f14
+0x39 0x60 0x2e 0x46
 
-# CHECK: c.ngle.s $f6,$f7
+# CHECK: c.ngle.s $f6, $f7
 0x39 0x30 0x07 0x46
 
-# CHECK: c.ngt.d $f12,$f14
-0x3f 0x30 0x27 0x46
+# CHECK: c.ngt.d $f12, $f14
+0x3f 0x60 0x2e 0x46
 
-# CHECK: c.ngt.s $f6,$f7
+# CHECK: c.ngt.s $f6, $f7
 0x3f 0x30 0x07 0x46
 
-# CHECK: c.ole.d $f12,$f14
-0x36 0x30 0x27 0x46
+# CHECK: c.ole.d $f12, $f14
+0x36 0x60 0x2e 0x46
 
-# CHECK: c.ole.s $f6,$f7
+# CHECK: c.ole.s $f6, $f7
 0x36 0x30 0x07 0x46
 
-# CHECK: c.olt.d $f12,$f14
-0x34 0x30 0x27 0x46
+# CHECK: c.olt.d $f12, $f14
+0x34 0x60 0x2e 0x46
 
-# CHECK: c.olt.s $f6,$f7
+# CHECK: c.olt.s $f6, $f7
 0x34 0x30 0x07 0x46
 
-# CHECK: c.seq.d $f12,$f14
-0x3a 0x30 0x27 0x46
+# CHECK: c.seq.d $f12, $f14
+0x3a 0x60 0x2e 0x46
 
-# CHECK: c.seq.s $f6,$f7
+# CHECK: c.seq.s $f6, $f7
 0x3a 0x30 0x07 0x46
 
-# CHECK: c.sf.d $f12,$f14
-0x38 0x30 0x27 0x46
+# CHECK: c.sf.d $f12, $f14
+0x38 0x60 0x2e 0x46
 
-# CHECK: c.sf.s $f6,$f7
+# CHECK: c.sf.s $f6, $f7
 0x38 0x30 0x07 0x46
 
-# CHECK: c.ueq.d $f12,$f14
-0x33 0x30 0x27 0x46
+# CHECK: c.ueq.d $f12, $f14
+0x33 0x60 0x2e 0x46
 
-# CHECK: c.ueq.s $f28,$f18
+# CHECK: c.ueq.s $f28, $f18
 0x33 0xe0 0x12 0x46
 
-# CHECK: c.ule.d $f12,$f14
-0x37 0x30 0x27 0x46
+# CHECK: c.ule.d $f12, $f14
+0x37 0x60 0x2e 0x46
 
-# CHECK: c.ule.s $f6,$f7
+# CHECK: c.ule.s $f6, $f7
 0x37 0x30 0x07 0x46
 
-# CHECK: c.ult.d $f12,$f14
-0x35 0x30 0x27 0x46
+# CHECK: c.ult.d $f12, $f14
+0x35 0x60 0x2e 0x46
 
-# CHECK: c.ult.s $f6,$f7
+# CHECK: c.ult.s $f6, $f7
 0x35 0x30 0x07 0x46
 
-# CHECK: c.un.d $f12,$f14
-0x31 0x30 0x27 0x46
+# CHECK: c.un.d $f12, $f14
+0x31 0x60 0x2e 0x46
 
-# CHECK: c.un.s $f6,$f7
+# CHECK: c.un.s $f6, $f7
 0x31 0x30 0x07 0x46
 
-# CHECK: ceil.w.d $f12,$f14
-0x8e 0x38 0x20 0x46
+# CHECK: ceil.w.d $f12, $f14
+0x0e 0x73 0x20 0x46
 
-# CHECK: ceil.w.s $f6,$f7
-0x8e 0x38 0x00 0x46
+# CHECK: ceil.w.s $f6, $f7
+0x8e 0x39 0x00 0x46
 
-# CHECK: cfc1 a2,$7
+# CHECK: cfc1  $6, $7
 0x00 0x38 0x46 0x44
 
-# CHECK: clo a2,a3
+# CHECK: clo  $6, $7
 0x21 0x30 0xe6 0x70
 
-# CHECK: clz a2,a3
+# CHECK: clz  $6, $7
 0x20 0x30 0xe6 0x70
 
-# CHECK: ctc1 a2,$7
+# CHECK: ctc1  $6, $7
 0x00 0x38 0xc6 0x44
 
-# CHECK: cvt.d.s $f6,$f7
+# CHECK: cvt.d.s $f6, $f7
 0xa1 0x39 0x00 0x46
 
-# CHECK: cvt.d.w $f12,$f14
-0xa1 0x39 0x80 0x46
-
-# CHECK: cvt.l.d $f12,$f14
-0xa5 0x39 0x20 0x46
-
-# CHECK: cvt.l.s $f6,$f7
-0xa5 0x39 0x00 0x46
+# CHECK: cvt.d.w $f12, $f14
+0x21 0x73 0x80 0x46
 
-# CHECK: cvt.s.d $f12,$f14
-0xa0 0x39 0x20 0x46
+# CHECK: cvt.s.d $f12, $f14
+0x20 0x73 0x20 0x46
 
-# CHECK: cvt.s.w $f6,$f7
+# CHECK: cvt.s.w $f6, $f7
 0xa0 0x39 0x80 0x46
 
-# CHECK: cvt.w.d $f12,$f14
-0xa4 0x39 0x20 0x46
+# CHECK: cvt.w.d $f12, $f14
+0x24 0x73 0x20 0x46
 
-# CHECK: cvt.w.s $f6,$f7
+# CHECK: cvt.w.s $f6, $f7
 0xa4 0x39 0x00 0x46
 
-# CHECK: floor.w.d $f12,$f14
-0x8f 0x39 0x20 0x46
+# CHECK: floor.w.d $f12, $f14
+0x0f 0x73 0x20 0x46
 
-# CHECK: floor.w.s $f6,$f7
+# CHECK: floor.w.s $f6, $f7
 0x8f 0x39 0x00 0x46
 
-# CHECK: j 00000530
+# CHECK: j 1328
 0x4c 0x01 0x00 0x08
 
-# CHECK: jal 00000530
+# CHECK: jal 1328
 0x4c 0x01 0x00 0x0c
 
-# CHECK: jalr a2,a3
+# CHECK: jalr  $7
 0x09 0xf8 0xe0 0x00
 
-# CHECK: jr a3
+# CHECK: jr  $7
 0x08 0x00 0xe0 0x00
 
-# CHECK: lb  a0,9158(a1)
+# CHECK: lb  $4, 9158($5)
 0xc6 0x23 0xa4 0x80
 
-# CHECK: lbu a0,6(a1)
+# CHECK: lbu $4, 6($5)
 0x06 0x00 0xa4 0x90
 
-# CHECK: ldc1  $f9,9158(a3)
+# CHECK: ldc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xd4
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x0c 0x00 0xa4 0x84
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x0c 0x00 0xa4 0x84
 
-# CHECK: li  v1,17767
-0x67 0x45 0x03 0x24
-
-# CHECK: ll  t1,9158(a3)
+# CHECK: ll  $9, 9158($7)
 0xc6 0x23 0xe9 0xc0
 
-# CHECK: lui a2,0x4567
+# CHECK: lui  $6, 17767
 0x67 0x45 0x06 0x3c
 
-# CHECK: lw  a0,24(a1)
+# CHECK: lw  $4, 24($5)
 0x18 0x00 0xa4 0x8c
 
-# CHECK lw at,-18316(v0)
-0x74 0xb8 0x41 0x8c
-
-# CHECK: lwc1  $f9,9158(a3)
+# CHECK: lwc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xc4
 
-# CHECK: madd  a2,a3
+# CHECK: lwl   $2,  3($4)
+0x03 0x00 0x82 0x88
+
+# CHECK: lwr   $3, 16($5)
+0x10 0x00 0xa3 0x98
+
+# CHECK: madd   $6,  $7
 0x00 0x00 0xc7 0x70
 
-# CHECK: maddu a2,a3
+# CHECK: maddu  $6,  $7
 0x01 0x00 0xc7 0x70
 
-# CHECK: mfc1  a2,$f7
+# CHECK: mfc1   $6, $f7
 0x00 0x38 0x06 0x44
 
-# CHECK: mfhi  a1
+# CHECK: mfhi  $5
 0x10 0x28 0x00 0x00
 
-# CHECK: mflo  a1
+# CHECK: mflo  $5
 0x12 0x28 0x00 0x00
 
-# CHECK: mov.d $f12,$f14
-0x86 0x39 0x20 0x46
+# CHECK: mov.d $f6, $f8
+0x86 0x41 0x20 0x46
 
-# CHECK: mov.s $f6,$f7
+# CHECK: mov.s $f6, $f7
 0x86 0x39 0x00 0x46
 
-# CHECK: move  a2,a1
-0x21 0x30 0xa0 0x00
-
-# CHECK: msub  a2,a3
+# CHECK: msub   $6,  $7
 0x04 0x00 0xc7 0x70
 
-# CHECK: msubu a2,a3
+# CHECK: msubu  $6,  $7
 0x05 0x00 0xc7 0x70
 
-# CHECK: mtc1  a2,$f7
+# CHECK: mtc1   $6, $f7
 0x00 0x38 0x86 0x44
 
-# CHECK: mthi  a3
+# CHECK: mthi   $7
 0x11 0x00 0xe0 0x00
 
-# CHECK: mtlo  a3
+# CHECK: mtlo   $7
 0x13 0x00 0xe0 0x00
 
-# CHECK: mul.d $f9,$f12,$f14
-0x42 0x32 0x27 0x46
+# CHECK: mul.d $f8, $f12, $f14
+0x02 0x62 0x2e 0x46
 
-# CHECK: mul.s $f9,$f6,$f7
+# CHECK: mul.s $f9, $f6, $f7
 0x42 0x32 0x07 0x46
 
-# CHECK: mul t1,a2,a3
+# CHECK: mul $9,  $6,  $7
 0x02 0x48 0xc7 0x70
 
-# CHECK: mult  v1,a1
+# CHECK: mult  $3, $5
 0x18 0x00 0x65 0x00
 
-# CHECK: multu v1,a1
+# CHECK: multu $3, $5
 0x19 0x00 0x65 0x00
 
-# CHECK: neg.d $f12,$f14
-0x87 0x39 0x20 0x46
+# CHECK: neg.d $f12, $f14
+0x07 0x73 0x20 0x46
 
-# CHECK: neg.s $f6,$f7
+# CHECK: neg.s $f6, $f7
 0x87 0x39 0x00 0x46
 
-# CHECK: neg v1,a1
-0x22 0x18 0x05 0x00
-
 # CHECK: nop
 0x00 0x00 0x00 0x00
 
-# CHECK: nor t1,a2,a3
+# CHECK: nor $9,  $6, $7
 0x27 0x48 0xc7 0x00
 
-# CHECK: not v1,a1
-0x27 0x18 0xa0 0x00
-
-# CHECK: or  v1,v1,a1
+# CHECK: or  $3, $3, $5
 0x25 0x18 0x65 0x00
 
-# CHECK: ori t1,a2,0x4567
+# CHECK: ori $9,  $6, 17767
 0x67 0x45 0xc9 0x34
 
-# CHECK: rdhwr a2,$29
-0x3b 0xe8 0x06 0x7c
-
-# CHECK: round.w.d $f12,$f14
-0x8c 0x39 0x20 0x46
+# CHECK: round.w.d $f12, $f14
+0x0c 0x73 0x20 0x46
 
-# CHECK: round.w.s $f6,$f7
+# CHECK: round.w.s $f6, $f7
 0x8c 0x39 0x00 0x46
 
-# CHECK: sb  a0,9158(a1)
+# CHECK: sb  $4, 9158($5)
 0xc6 0x23 0xa4 0xa0
 
-# CHECK: sb  a0,6(a1)
+# CHECK: sb  $4, 6($5)
 0x06 0x00 0xa4 0xa0
 
-# CHECK: sc  t1,9158(a3)
+# CHECK: sc  $9, 9158($7)
 0xc6 0x23 0xe9 0xe0
 
-# CHECK: sdc1  $f9,9158(a3)
+# CHECK: sdc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xf4
 
-# CHECK: sh  a0,9158(a1)
+# CHECK: sh  $4, 9158($5)
 0xc6 0x23 0xa4 0xa4
 
-# CHECK: sll a0,v1,0x7
+# CHECK: sll $4, $3, 7
 0xc0 0x21 0x03 0x00
 
-# CHECK: sllv  v0,v1,a1
+# CHECK: sllv  $2, $3, $5
 0x04 0x10 0xa3 0x00
 
-# CHECK: slt v1,v1,a1
+# CHECK: slt $3, $3, $5
 0x2a 0x18 0x65 0x00
 
-# CHECK: slti  v1,v1,103
+# CHECK: slti  $3, $3, 103
 0x67 0x00 0x63 0x28
 
-# CHECK: sltiu v1,v1,103
+# CHECK: sltiu $3, $3, 103
 0x67 0x00 0x63 0x2c
 
-# CHECK: sltu  v1,v1,a1
+# CHECK: sltu  $3, $3, $5
 0x2b 0x18 0x65 0x00
 
-# CHECK: sqrt.d  $f12,$f14
-0x84 0x39 0x20 0x46
+# CHECK: sqrt.d  $f12, $f14
+0x04 0x73 0x20 0x46
 
-# CHECK: sqrt.s  $f6,$f7
+# CHECK: sqrt.s  $f6, $f7
 0x84 0x39 0x00 0x46
 
-# CHECK: sra a0,v1,0x7
+# CHECK: sra $4, $3, 7
 0xc3 0x21 0x03 0x00
 
-# CHECK: sra a0,v1,0x7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav  v0,v1,a1
+# CHECK: srav  $2, $3, $5
 0x07 0x10 0xa3 0x00
 
-# CHECK: srl a0,v1,0x7
+# CHECK: srl $4, $3, 7
 0xc2 0x21 0x03 0x00
 
-# CHECK: srlv  v0,v1,a1
+# CHECK: srlv  $2, $3, $5
 0x06 0x10 0xa3 0x00
 
-# CHECK: sub.d $f9,$f12,$f14
-0x41 0x32 0x27 0x46
+# CHECK: sub.d $f8, $f12, $f14
+0x01 0x62 0x2e 0x46
 
-# CHECK: sub.s $f9,$f6,$f7
+# CHECK: sub.s $f9, $f6, $f7
 0x41 0x32 0x07 0x46
 
-# CHECK: sub t1,a2,a3
+# CHECK: sub $9,  $6, $7
 0x22 0x48 0xc7 0x00
 
-# CHECK: subu  a0,v1,a1
+# CHECK: subu  $4, $3, $5
 0x23 0x20 0x65 0x00
 
-# CHECK: sw  a0,24(a1)
+# CHECK: sw  $4, 24($5)
 0x18 0x00 0xa4 0xac
 
-# CHECK: swc1  $f9,9158(a3)
+# CHECK: swc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xe4
 
-# CHECK: sync  0x7
+# CHECK: swl $4,  16($5)
+0x10 0x00 0xa4 0xa8
+
+# CHECK: swr $6, 16($7)
+0x10 0x00 0xe6 0xb8
+
+# CHECK: sync  7
 0xcf 0x01 0x00 0x00
 
-# CHECK: trunc.w.d $f12,$f14
-0x8d 0x39 0x20 0x46
+# CHECK: trunc.w.d $f12, $f14
+0x0d 0x73 0x20 0x46
 
-# CHECK: trunc.w.s $f6,$f7
+# CHECK: trunc.w.s $f6, $f7
 0x8d 0x39 0x00 0x46
 
-# CHECK: xor v1,v1,a1
+# CHECK: xor $3, $3, $5
 0x26 0x18 0x65 0x00
 
-# CHECK: xori  t1,a2,0x4567
+# CHECK: xori  $9,  $6, 17767
 0x67 0x45 0xc9 0x38
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 295ffd0..3b70db3 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -1,439 +1,430 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x05
 
-# CHECK: abs.d $f12,$f14
-0x46 0x20 0x39 0x85
-
-# CHECK: abs.s $f6,$f7
+# CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x85
 
-# CHECK: add t1,a2,a3
+# CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x20
 
-# CHECK: add.d $f18,$f12,$f14
-0x46 0x27 0x32 0x40
+# CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x00
 
-# CHECK: add.s $f9,$f6,$f7
+# CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x40
 
-# CHECK: addi t1,a2,17767
+# CHECK: addi $9, $6, 17767
 0x20 0xc9 0x45 0x67
 
-# CHECK: addiu t1,a2,-15001
+# CHECK: addiu $9, $6, -15001
 0x24 0xc9 0xc5 0x67
 
-# CHECK: addu t1,a2,a3
+# CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x21
 
-# CHECK: and t1,a2,a3
+# CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x24
 
-# CHECK: andi t1,a2,0x4567
+# CHECK: andi $9, $6, 17767
 0x30 0xc9 0x45 0x67
 
-# CHECK: b 00000534
+# CHECK: b 1332
 0x10 0x00 0x01 0x4c
 
-# CHECK: bal 00000534
-0x04 0x11 0x01 0x4c
-
-# CHECK: bc1f 00000534
+# CHECK: bc1f 1332
 0x45 0x00 0x01 0x4c
 
-# CHECK: bc1t 00000534
+# CHECK: bc1t 1332
 0x45 0x01 0x01 0x4c
 
-# CHECK: beq t1,a2,00000534
+# CHECK: beq $9, $6, 1332
 0x11 0x26 0x01 0x4c
 
-# CHECK: bgez a2,00000534
+# CHECK: bgez  $6, 1332
 0x04 0xc1 0x01 0x4c
 
-# CHECK: bgezal a2,00000534
+# CHECK: bgezal  $6, 1332
 0x04 0xd1 0x01 0x4c
 
-# CHECK: bgtz a2,00000534
+# CHECK: bgtz  $6, 1332
 0x1c 0xc0 0x01 0x4c
 
-# CHECK: blez a2,00000534
+# CHECK: blez  $6, 1332
 0x18 0xc0 0x01 0x4c
 
-# CHECK: bne t1,a2,00000534
+# CHECK: bne $9, $6, 1332
 0x15 0x26 0x01 0x4c
 
-# CHECK: c.eq.d $f12,$f14
-0x46 0x27 0x30 0x32
+# CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x32
 
-# CHECK: c.eq.s $f6,$f7
+# CHECK: c.eq.s $f6, $f7
 0x46 0x07 0x30 0x32
 
-# CHECK: c.f.d $f12,$f14
-0x46 0x27 0x30 0x30
+# CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x30
 
-# CHECK: c.f.s $f6,$f7
+# CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x30
 
-# CHECK: c.le.d $f12,$f14
-0x46 0x27 0x30 0x3e
+# CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3e
 
-# CHECK: c.le.s $f6,$f7
+# CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3e
 
-# CHECK: c.lt.d $f12,$f14
-0x46 0x27 0x30 0x3c
+# CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3c
 
-# CHECK: c.lt.s $f6,$f7
+# CHECK: c.lt.s $f6, $f7
 0x46 0x07 0x30 0x3c
 
-# CHECK: c.nge.d $f12,$f14
-0x46 0x27 0x30 0x3d
+# CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3d
 
-# CHECK: c.nge.s $f6,$f7
+# CHECK: c.nge.s $f6, $f7
 0x46 0x07 0x30 0x3d
 
-# CHECK: c.ngl.d $f12,$f14
-0x46 0x27 0x30 0x3b
+# CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3b
 
-# CHECK: c.ngl.s $f6,$f7
+# CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3b
 
-# CHECK: c.ngle.d $f12,$f14
-0x46 0x27 0x30 0x39
+# CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x39
 
-# CHECK: c.ngle.s $f6,$f7
+# CHECK: c.ngle.s $f6, $f7
 0x46 0x07 0x30 0x39
 
-# CHECK: c.ngt.d $f12,$f14
-0x46 0x27 0x30 0x3f
+# CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x60 0x3f
 
-# CHECK: c.ngt.s $f6,$f7
+# CHECK: c.ngt.s $f6, $f7
 0x46 0x07 0x30 0x3f
 
-# CHECK: c.ole.d $f12,$f14
-0x46 0x27 0x30 0x36
+# CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x36
 
-# CHECK: c.ole.s $f6,$f7
+# CHECK: c.ole.s $f6, $f7
 0x46 0x07 0x30 0x36
 
-# CHECK: c.olt.d $f12,$f14
-0x46 0x27 0x30 0x34
+# CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x34
 
-# CHECK: c.olt.s $f6,$f7
+# CHECK: c.olt.s $f6, $f7
 0x46 0x07 0x30 0x34
 
-# CHECK: c.seq.d $f12,$f14
-0x46 0x27 0x30 0x3a
+# CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3a
 
-# CHECK: c.seq.s $f6,$f7
+# CHECK: c.seq.s $f6, $f7
 0x46 0x07 0x30 0x3a
 
-# CHECK: c.sf.d $f12,$f14
-0x46 0x27 0x30 0x38
+# CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x38
 
-# CHECK: c.sf.s $f6,$f7
+# CHECK: c.sf.s $f6, $f7
 0x46 0x07 0x30 0x38
 
-# CHECK: c.ueq.d $f12,$f14
-0x46 0x27 0x30 0x33
+# CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x33
 
-# CHECK: c.ueq.s $f28,$f18
+# CHECK: c.ueq.s $f28, $f18
 0x46 0x12 0xe0 0x33
 
-# CHECK: c.ule.d $f12,$f14
-0x46 0x27 0x30 0x37
+# CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x37
 
-# CHECK: c.ule.s $f6,$f7
+# CHECK: c.ule.s $f6, $f7
 0x46 0x07 0x30 0x37
 
-# CHECK: c.ult.d $f12,$f14
-0x46 0x27 0x30 0x35
+# CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x35
 
-# CHECK: c.ult.s $f6,$f7
+# CHECK: c.ult.s $f6, $f7
 0x46 0x07 0x30 0x35
 
-# CHECK: c.un.d $f12,$f14
-0x46 0x27 0x30 0x31
+# CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x31
 
-# CHECK: c.un.s $f6,$f7
+# CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x31
 
-# CHECK: ceil.w.d $f12,$f14
-0x46 0x20 0x39 0x8e
+# CHECK: ceil.w.d $f12, $f14
+0x46 0x20 0x73 0x0e
 
-# CHECK: ceil.w.s $f6,$f7
+# CHECK: ceil.w.s $f6, $f7
 0x46 0x00 0x39 0x8e
 
-# CHECK: cfc1 a2,$7
+# CHECK: cfc1  $6, $7
 0x44 0x46 0x38 0x00
 
-# CHECK: clo a2,a3
+# CHECK: clo  $6, $7
 0x70 0xe6 0x30 0x21
 
-# CHECK: clz a2,a3
+# CHECK: clz  $6, $7
 0x70 0xe6 0x30 0x20
 
-# CHECK: ctc1 a2,$7
+# CHECK: ctc1  $6, $7
 0x44 0xc6 0x38 0x00
 
-# CHECK: cvt.d.s $f6,$f7
-0x46 0x00 0x38 0xa1
+# CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa1
 
-# CHECK: cvt.d.w $f12,$f14
-0x46 0x80 0x38 0xa1
+# CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x73 0x21
 
-# CHECK: cvt.l.d $f12,$f14
-0x46 0x20 0x39 0xa5
+# CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x73 0x25
 
-# CHECK: cvt.l.s $f6,$f7
+# CHECK: cvt.l.s $f6, $f7
 0x46 0x00 0x39 0xa5
 
-# CHECK: cvt.s.d $f12,$f14
-0x46 0x20 0x39 0xa0
+# CHECK: cvt.s.d $f12, $f14
+0x46 0x20 0x73 0x20
 
-# CHECK: cvt.s.w $f6,$f7
+# CHECK: cvt.s.w $f6, $f7
 0x46 0x80 0x39 0xa0
 
-# CHECK: cvt.w.d $f12,$f14
-0x46 0x20 0x39 0xa4
+# CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x73 0x24
 
-# CHECK: cvt.w.s $f6,$f7
+# CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa4
 
-# CHECK: floor.w.d $f12,$f14
-0x46 0x20 0x39 0x8f
+# CHECK: floor.w.d $f12, $f14
+0x46 0x20 0x73 0x0f
 
-# CHECK: floor.w.s $f6,$f7
+# CHECK: floor.w.s $f6, $f7
 0x46 0x00 0x39 0x8f
 
-# CHECK: ins s3,t1,0x6,0x7
+# CHECK: ins $19, $9, 6, 7
 0x7d 0x33 0x61 0x84
 
-# CHECK: j 00000530
+# CHECK: j 1328
 0x08 0x00 0x01 0x4c
 
-# CHECK: jal 00000530
+# CHECK: jal 1328
 0x0c 0x00 0x01 0x4c
 
-# CHECK: jalr a2,a3
+# CHECK: jalr  $7
 0x00 0xe0 0xf8 0x09
 
-# CHECK: jr a3
+# CHECK: jr  $7
 0x00 0xe0 0x00 0x08
 
-# CHECK: lb  a0,9158(a1)
+# CHECK: lb  $4, 9158($5)
 0x80 0xa4 0x23 0xc6
 
-# CHECK: lbu a0,6(a1)
+# CHECK: lbu $4, 6($5)
 0x90 0xa4 0x00 0x06
 
-# CHECK: ldc1  $f9,9158(a3)
+# CHECK: ldc1  $f9, 9158($7)
 0xd4 0xe9 0x23 0xc6
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x84 0xa4 0x00 0x0c
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x84 0xa4 0x00 0x0c
 
-# CHECK: li  v1,17767
-0x24 0x03 0x45 0x67
-
-# CHECK: ll  t1,9158(a3)
+# CHECK: ll  $9, 9158($7)
 0xc0 0xe9 0x23 0xc6
 
-# CHECK: lui a2,0x4567
+# CHECK: lui  $6, 17767
 0x3c 0x06 0x45 0x67
 
-# CHECK: lw  a0,24(a1)
+# CHECK: lw  $4, 24($5)
 0x8c 0xa4 0x00 0x18
 
-# CHECK: lwc1  $f9,9158(a3)
+# CHECK: lwc1  $f9, 9158($7)
 0xc4 0xe9 0x23 0xc6
 
-# CHECK: madd  a2,a3
+# CHECK: lwl   $2,  3($4)
+0x88 0x82 0x00 0x03
+
+# CHECK: lwr   $3, 16($5)
+0x98 0xa3 0x00 0x10
+
+# CHECK: madd   $6,  $7
 0x70 0xc7 0x00 0x00
 
-# CHECK: maddu a2,a3
+# CHECK: maddu  $6,  $7
 0x70 0xc7 0x00 0x01
 
-# CHECK: mfc1  a2,$f7
+# CHECK: mfc1   $6, $f7
 0x44 0x06 0x38 0x00
 
-# CHECK: mfhi  a1
+# CHECK: mfhi  $5
 0x00 0x00 0x28 0x10
 
-# CHECK: mflo  a1
+# CHECK: mflo  $5
 0x00 0x00 0x28 0x12
 
-# CHECK: mov.d $f6,$f7
-0x46 0x20 0x39 0x86
+# CHECK: mov.d $f6, $f8
+0x46 0x20 0x41 0x86
 
-# CHECK: mov.s $f6,$f7
+# CHECK: mov.s $f6, $f7
 0x46 0x00 0x39 0x86
 
-# CHECK: move  a2,a1
-0x00 0xa0 0x30 0x21
-
-# CHECK: msub  a2,a3
+# CHECK: msub   $6,  $7
 0x70 0xc7 0x00 0x04
 
-# CHECK: msubu a2,a3
+# CHECK: msubu  $6,  $7
 0x70 0xc7 0x00 0x05
 
-# CHECK: mtc1  a2,$f7
+# CHECK: mtc1   $6, $f7
 0x44 0x86 0x38 0x00
 
-# CHECK: mthi  a3
+# CHECK: mthi   $7
 0x00 0xe0 0x00 0x11
 
-# CHECK: mtlo  a3
+# CHECK: mtlo   $7
 0x00 0xe0 0x00 0x13
 
-# CHECK: mul.d $f9,$f12,$f14
-0x46 0x27 0x32 0x42
+# CHECK: mul.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02
 
-# CHECK: mul.s $f9,$f6,$f7
+# CHECK: mul.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42
 
-# CHECK: mul t1,a2,a3
+# CHECK: mul $9,  $6,  $7
 0x70 0xc7 0x48 0x02
 
-# CHECK: mult  v1,a1
+# CHECK: mult  $3, $5
 0x00 0x65 0x00 0x18
 
-# CHECK: multu v1,a1
+# CHECK: multu $3, $5
 0x00 0x65 0x00 0x19
 
-# CHECK: neg.d $f12,$f14
-0x46 0x20 0x39 0x87
+# CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x07
 
-# CHECK: neg.s $f6,$f7
+# CHECK: neg.s $f6, $f7
 0x46 0x00 0x39 0x87
 
-# CHECK: neg v1,a1
-0x00 0x05 0x18 0x22
-
 # CHECK: nop
 0x00 0x00 0x00 0x00
 
-# CHECK: nor t1,a2,a3
+# CHECK: nor $9,  $6, $7
 0x00 0xc7 0x48 0x27
 
-# CHECK: not v1,a1
-0x00 0xa0 0x18 0x27
-
-# CHECK: or  v1,v1,a1
+# CHECK: or  $3, $3, $5
 0x00 0x65 0x18 0x25
 
-# CHECK: ori t1,a2,0x4567
+# CHECK: ori $9,  $6, 17767
 0x34 0xc9 0x45 0x67
 
-# CHECK: rdhwr a2,$29
-0x7c 0x06 0xe8 0x3b
-
-# CHECK: ror t1,a2,0x7
+# CHECK: rotr $9, $6, 7
 0x00 0x26 0x49 0xc2
 
-# CHECK: rorv  t1,a2,a3
+# CHECK:  rotrv $9, $6, $7
 0x00 0xe6 0x48 0x46
 
-# CHECK: round.w.d $f12,$f14
-0x46 0x20 0x39 0x8c
+# CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0c
 
-# CHECK: round.w.s $f6,$f7
+# CHECK: round.w.s $f6, $f7
 0x46 0x00 0x39 0x8c
 
-# CHECK: sb  a0,9158(a1)
+# CHECK: sb  $4, 9158($5)
 0xa0 0xa4 0x23 0xc6
 
-# CHECK: sb  a0,6(a1)
+# CHECK: sb  $4, 6($5)
 0xa0 0xa4 0x00 0x06
 
-# CHECK: sc  t1,9158(a3)
+# CHECK: sc  $9, 9158($7)
 0xe0 0xe9 0x23 0xc6
 
-# CHECK: sdc1  $f9,9158(a3)
+# CHECK: sdc1  $f9, 9158($7)
 0xf4 0xe9 0x23 0xc6
 
-# CHECK: seb a2,a3
+# CHECK: seb $6, $7
 0x7c 0x07 0x34 0x20
 
-# CHECK: seh a2,a3
+# CHECK: seh $6, $7
 0x7c 0x07 0x36 0x20
 
-# CHECK: sh  a0,9158(a1)
+# CHECK: sh  $4, 9158($5)
 0xa4 0xa4 0x23 0xc6
 
-# CHECK: sll a0,v1,0x7
+# CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc0
 
-# CHECK: sllv  v0,v1,a1
+# CHECK: sllv  $2, $3, $5
 0x00 0xa3 0x10 0x04
 
-# CHECK: slt v1,v1,a1
+# CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2a
 
-# CHECK: slti  v1,v1,103
+# CHECK: slti  $3, $3, 103
 0x28 0x63 0x00 0x67
 
-# CHECK: sltiu v1,v1,103
+# CHECK: sltiu $3, $3, 103
 0x2c 0x63 0x00 0x67
 
-# CHECK: sltu  v1,v1,a1
+# CHECK: sltu  $3, $3, $5
 0x00 0x65 0x18 0x2b
 
-# CHECK: sqrt.d  $f12,$f14
-0x46 0x20 0x39 0x84
+# CHECK: sqrt.d  $f12, $f14
+0x46 0x20 0x73 0x04
 
-# CHECK: sqrt.s  $f6,$f7
+# CHECK: sqrt.s  $f6, $f7
 0x46 0x00 0x39 0x84
 
-# CHECK: sra a0,v1,0x7
-0x00 0x03 0x21 0xc3
-
-# CHECK: sra a0,v1,0x7
+# CHECK: sra $4, $3, 7
 0x00 0x03 0x21 0xc3
 
-# CHECK: srav  v0,v1,a1
+# CHECK: srav  $2, $3, $5
 0x00 0xa3 0x10 0x07
 
-# CHECK: srl a0,v1,0x7
+# CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc2
 
-# CHECK: srlv  v0,v1,a1
+# CHECK: srlv  $2, $3, $5
 0x00 0xa3 0x10 0x06
 
-# CHECK: sub.d $f9,$f12,$f14
-0x46 0x27 0x32 0x41
+# CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01
 
-# CHECK: sub.s $f9,$f6,$f7
+# CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41
 
-# CHECK: sub t1,a2,a3
+# CHECK: sub $9,  $6, $7
 0x00 0xc7 0x48 0x22
 
-# CHECK: subu  a0,v1,a1
+# CHECK: subu  $4, $3, $5
 0x00 0x65 0x20 0x23
 
-# CHECK: sw  a0,24(a1)
+# CHECK: sw  $4, 24($5)
 0xac 0xa4 0x00 0x18
 
-# CHECK: swc1  $f9,9158(a3)
+# CHECK: swc1  $f9, 9158($7)
 0xe4 0xe9 0x23 0xc6
 
-# CHECK: sync  0x7
+# CHECK: swl $4,  16($5)
+0xa8 0xa4 0x00 0x10
+
+# CHECK: swr $6, 16($7)
+0xb8 0xe6 0x00 0x10
+
+# CHECK: sync  7
 0x00 0x00 0x01 0xcf
 
-# CHECK: trunc.w.d $f12,$f14
-0x46 0x20 0x39 0x8d
+# CHECK: trunc.w.d $f12, $f14
+0x46 0x20 0x73 0x0d
 
-# CHECK: trunc.w.s $f6,$f7
+# CHECK: trunc.w.s $f6, $f7
 0x46 0x00 0x39 0x8d
 
-# CHECK: wsbh  a2,a3
+# CHECK: wsbh  $6, $7
 0x7c 0x07 0x30 0xa0
 
-# CHECK: xor v1,v1,a1
+# CHECK: xor $3, $3, $5
 0x00 0x65 0x18 0x26
 
-# CHECK: xori  t1,a2,0x4567
+# CHECK: xori  $9,  $6, 17767
 0x38 0xc9 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index 6d8be79..ecfde7a 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -1,442 +1,430 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: abs.d $f12, $f14
+0x05 0x73 0x20 0x46
 
-# CHECK: abs.d $f12,$f14
-0x85 0x39 0x20 0x46
-
-# CHECK: abs.s $f6,$f7
+# CHECK: abs.s $f6, $f7
 0x85 0x39 0x00 0x46
 
-# CHECK: add t1,a2,a3
+# CHECK: add $9, $6, $7
 0x20 0x48 0xc7 0x00
 
-# CHECK: add.d $f18,$f12,$f14
-0x40 0x32 0x27 0x46
+# CHECK: add.d $f8, $f12, $f14
+0x00 0x62 0x2e 0x46
 
-# CHECK: add.s $f9,$f6,$f7
+# CHECK: add.s $f9, $f6, $f7
 0x40 0x32 0x07 0x46
 
-# CHECK: addi t1,a2,17767
+# CHECK: addi $9, $6, 17767
 0x67 0x45 0xc9 0x20
 
-# CHECK: addiu t1,a2,-15001
+# CHECK: addiu $9, $6, -15001
 0x67 0xc5 0xc9 0x24
 
-# CHECK: addu t1,a2,a3
+# CHECK: addu $9, $6, $7
 0x21 0x48 0xc7 0x00
 
-# CHECK: and t1,a2,a3
+# CHECK: and $9, $6, $7
 0x24 0x48 0xc7 0x00
 
-# CHECK: andi t1,a2,0x4567
+# CHECK: andi $9, $6, 17767
 0x67 0x45 0xc9 0x30
 
-# CHECK: b 00000534
+# CHECK: b 1332
 0x4c 0x01 0x00 0x10
 
-# CHECK: bal 00000534
-0x4c 0x01 0x11 0x04
-
-# CHECK: bc1f 00000534
+# CHECK: bc1f 1332
 0x4c 0x01 0x00 0x45
 
-# CHECK: bc1t 00000534
+# CHECK: bc1t 1332
 0x4c 0x01 0x01 0x45
 
-# CHECK: beq t1,a2,00000534
+# CHECK: beq $9, $6, 1332
 0x4c 0x01 0x26 0x11
 
-# CHECK: bgez a2,00000534
+# CHECK: bgez  $6, 1332
 0x4c 0x01 0xc1 0x04
 
-# CHECK: bgezal a2,00000534
+# CHECK: bgezal  $6, 1332
 0x4c 0x01 0xd1 0x04
 
-# CHECK: bgtz a2,00000534
+# CHECK: bgtz  $6, 1332
 0x4c 0x01 0xc0 0x1c
 
-# CHECK: blez a2,00000534
+# CHECK: blez  $6, 1332
 0x4c 0x01 0xc0 0x18
 
-# CHECK: bne t1,a2,00000534
+# CHECK: bne $9, $6, 1332
 0x4c 0x01 0x26 0x15
 
-# CHECK: c.eq.d $f12,$f14
-0x32 0x30 0x27 0x46
+# CHECK: c.eq.d $f12, $f14
+0x32 0x60 0x2e 0x46
 
-# CHECK: c.eq.s $f6,$f7
+# CHECK: c.eq.s $f6, $f7
 0x32 0x30 0x07 0x46
 
-# CHECK: c.f.d $f12,$f14
-0x30 0x30 0x27 0x46
+# CHECK: c.f.d $f12, $f14
+0x30 0x60 0x2e 0x46
 
-# CHECK: c.f.s $f6,$f7
+# CHECK: c.f.s $f6, $f7
 0x30 0x30 0x07 0x46
 
-# CHECK: c.le.d $f12,$f14
-0x3e 0x30 0x27 0x46
+# CHECK: c.le.d $f12, $f14
+0x3e 0x60 0x2e 0x46
 
-# CHECK: c.le.s $f6,$f7
+# CHECK: c.le.s $f6, $f7
 0x3e 0x30 0x07 0x46
 
-# CHECK: c.lt.d $f12,$f14
-0x3c 0x30 0x27 0x46
+# CHECK: c.lt.d $f12, $f14
+0x3c 0x60 0x2e 0x46
 
-# CHECK: c.lt.s $f6,$f7
+# CHECK: c.lt.s $f6, $f7
 0x3c 0x30 0x07 0x46
 
-# CHECK: c.nge.d $f12,$f14
-0x3d 0x30 0x27 0x46
+# CHECK: c.nge.d $f12, $f14
+0x3d 0x60 0x2e 0x46
 
-# CHECK: c.nge.s $f6,$f7
+# CHECK: c.nge.s $f6, $f7
 0x3d 0x30 0x07 0x46
 
-# CHECK: c.ngl.d $f12,$f14
-0x3b 0x30 0x27 0x46
+# CHECK: c.ngl.d $f12, $f14
+0x3b 0x60 0x2e 0x46
 
-# CHECK: c.ngl.s $f6,$f7
+# CHECK: c.ngl.s $f6, $f7
 0x3b 0x30 0x07 0x46
 
-# CHECK: c.ngle.d $f12,$f14
-0x39 0x30 0x27 0x46
+# CHECK: c.ngle.d $f12, $f14
+0x39 0x60 0x2e 0x46
 
-# CHECK: c.ngle.s $f6,$f7
+# CHECK: c.ngle.s $f6, $f7
 0x39 0x30 0x07 0x46
 
-# CHECK: c.ngt.d $f12,$f14
-0x3f 0x30 0x27 0x46
+# CHECK: c.ngt.d $f12, $f14
+0x3f 0x60 0x2e 0x46
 
-# CHECK: c.ngt.s $f6,$f7
+# CHECK: c.ngt.s $f6, $f7
 0x3f 0x30 0x07 0x46
 
-# CHECK: c.ole.d $f12,$f14
-0x36 0x30 0x27 0x46
+# CHECK: c.ole.d $f12, $f14
+0x36 0x60 0x2e 0x46
 
-# CHECK: c.ole.s $f6,$f7
+# CHECK: c.ole.s $f6, $f7
 0x36 0x30 0x07 0x46
 
-# CHECK: c.olt.d $f12,$f14
-0x34 0x30 0x27 0x46
+# CHECK: c.olt.d $f12, $f14
+0x34 0x60 0x2e 0x46
 
-# CHECK: c.olt.s $f6,$f7
+# CHECK: c.olt.s $f6, $f7
 0x34 0x30 0x07 0x46
 
-# CHECK: c.seq.d $f12,$f14
-0x3a 0x30 0x27 0x46
+# CHECK: c.seq.d $f12, $f14
+0x3a 0x60 0x2e 0x46
 
-# CHECK: c.seq.s $f6,$f7
+# CHECK: c.seq.s $f6, $f7
 0x3a 0x30 0x07 0x46
 
-# CHECK: c.sf.d $f12,$f14
-0x38 0x30 0x27 0x46
+# CHECK: c.sf.d $f12, $f14
+0x38 0x60 0x2e 0x46
 
-# CHECK: c.sf.s $f6,$f7
+# CHECK: c.sf.s $f6, $f7
 0x38 0x30 0x07 0x46
 
-# CHECK: c.ueq.d $f12,$f14
-0x33 0x30 0x27 0x46
+# CHECK: c.ueq.d $f12, $f14
+0x33 0x60 0x2e 0x46
 
-# CHECK: c.ueq.s $f28,$f18
+# CHECK: c.ueq.s $f28, $f18
 0x33 0xe0 0x12 0x46
 
-# CHECK: c.ule.d $f12,$f14
-0x37 0x30 0x27 0x46
+# CHECK: c.ule.d $f12, $f14
+0x37 0x60 0x2e 0x46
 
-# CHECK: c.ule.s $f6,$f7
+# CHECK: c.ule.s $f6, $f7
 0x37 0x30 0x07 0x46
 
-# CHECK: c.ult.d $f12,$f14
-0x35 0x30 0x27 0x46
+# CHECK: c.ult.d $f12, $f14
+0x35 0x60 0x2e 0x46
 
-# CHECK: c.ult.s $f6,$f7
+# CHECK: c.ult.s $f6, $f7
 0x35 0x30 0x07 0x46
 
-# CHECK: c.un.d $f12,$f14
-0x31 0x30 0x27 0x46
+# CHECK: c.un.d $f12, $f14
+0x31 0x60 0x2e 0x46
 
-# CHECK: c.un.s $f6,$f7
+# CHECK: c.un.s $f6, $f7
 0x31 0x30 0x07 0x46
 
-# CHECK: ceil.w.d $f12,$f14
-0x8e 0x38 0x20 0x46
+# CHECK: ceil.w.d $f12, $f14
+0x0e 0x73 0x20 0x46
 
-# CHECK: ceil.w.s $f6,$f7
-0x8e 0x38 0x00 0x46
+# CHECK: ceil.w.s $f6, $f7
+0x8e 0x39 0x00 0x46
 
-# CHECK: cfc1 a2,$7
+# CHECK: cfc1  $6, $7
 0x00 0x38 0x46 0x44
 
-# CHECK: clo a2,a3
+# CHECK: clo  $6, $7
 0x21 0x30 0xe6 0x70
 
-# CHECK: clz a2,a3
+# CHECK: clz  $6, $7
 0x20 0x30 0xe6 0x70
 
-# CHECK: ctc1 a2,$7
+# CHECK: ctc1  $6, $7
 0x00 0x38 0xc6 0x44
 
-# CHECK: cvt.d.s $f6,$f7
+# CHECK: cvt.d.s $f6, $f7
 0xa1 0x39 0x00 0x46
 
-# CHECK: cvt.d.w $f12,$f14
-0xa1 0x39 0x80 0x46
+# CHECK: cvt.d.w $f12, $f14
+0x21 0x73 0x80 0x46
 
-# CHECK: cvt.l.d $f12,$f14
-0xa5 0x39 0x20 0x46
+# CHECK: cvt.l.d $f12, $f14
+0x25 0x73 0x20 0x46
 
-# CHECK: cvt.l.s $f6,$f7
+# CHECK: cvt.l.s $f6, $f7
 0xa5 0x39 0x00 0x46
 
-# CHECK: cvt.s.d $f12,$f14
-0xa0 0x39 0x20 0x46
+# CHECK: cvt.s.d $f12, $f14
+0x20 0x73 0x20 0x46
 
-# CHECK: cvt.s.w $f6,$f7
+# CHECK: cvt.s.w $f6, $f7
 0xa0 0x39 0x80 0x46
 
-# CHECK: cvt.w.d $f12,$f14
-0xa4 0x39 0x20 0x46
+# CHECK: cvt.w.d $f12, $f14
+0x24 0x73 0x20 0x46
 
-# CHECK: cvt.w.s $f6,$f7
+# CHECK: cvt.w.s $f6, $f7
 0xa4 0x39 0x00 0x46
 
-# CHECK: floor.w.d $f12,$f14
-0x8f 0x39 0x20 0x46
+# CHECK: floor.w.d $f12, $f14
+0x0f 0x73 0x20 0x46
 
-# CHECK: floor.w.s $f6,$f7
+# CHECK: floor.w.s $f6, $f7
 0x8f 0x39 0x00 0x46
 
-# CHECK: ins s3,t1,0x6,0x7
+# CHECK: ins $19, $9, 6, 7
 0x84 0x61 0x33 0x7d
 
-# CHECK: j 00000530
+# CHECK: j 1328
 0x4c 0x01 0x00 0x08
 
-# CHECK: jal 00000530
+# CHECK: jal 1328
 0x4c 0x01 0x00 0x0c
 
-# CHECK: jalr a2,a3
+# CHECK: jalr  $7
 0x09 0xf8 0xe0 0x00
 
-# CHECK: jr a3
+# CHECK: jr  $7
 0x08 0x00 0xe0 0x00
 
-# CHECK: lb  a0,9158(a1)
+# CHECK: lb  $4, 9158($5)
 0xc6 0x23 0xa4 0x80
 
-# CHECK: lbu a0,6(a1)
+# CHECK: lbu $4, 6($5)
 0x06 0x00 0xa4 0x90
 
-# CHECK: ldc1  $f9,9158(a3)
+# CHECK: ldc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xd4
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x0c 0x00 0xa4 0x84
 
-# CHECK: lh  a0,12(a1)
+# CHECK: lh  $4, 12($5)
 0x0c 0x00 0xa4 0x84
 
-# CHECK: li  v1,17767
-0x67 0x45 0x03 0x24
-
-# CHECK: ll  t1,9158(a3)
+# CHECK: ll  $9, 9158($7)
 0xc6 0x23 0xe9 0xc0
 
-# CHECK: lui a2,0x4567
+# CHECK: lui  $6, 17767
 0x67 0x45 0x06 0x3c
 
-# CHECK: lw  a0,24(a1)
+# CHECK: lw  $4, 24($5)
 0x18 0x00 0xa4 0x8c
 
-# CHECK lw at,-18316(v0)
-0x74 0xb8 0x41 0x8c
-
-# CHECK: lwc1  $f9,9158(a3)
+# CHECK: lwc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xc4
 
-# CHECK: madd  a2,a3
+# CHECK: lwl   $2,  3($4)
+0x03 0x00 0x82 0x88
+
+# CHECK: lwr   $3, 16($5)
+0x10 0x00 0xa3 0x98
+
+# CHECK: madd   $6,  $7
 0x00 0x00 0xc7 0x70
 
-# CHECK: maddu a2,a3
+# CHECK: maddu  $6,  $7
 0x01 0x00 0xc7 0x70
 
-# CHECK: mfc1  a2,$f7
+# CHECK: mfc1   $6, $f7
 0x00 0x38 0x06 0x44
 
-# CHECK: mfhi  a1
+# CHECK: mfhi  $5
 0x10 0x28 0x00 0x00
 
-# CHECK: mflo  a1
+# CHECK: mflo  $5
 0x12 0x28 0x00 0x00
 
-# CHECK: mov.d $f12,$f14
-0x86 0x39 0x20 0x46
+# CHECK: mov.d $f6, $f8
+0x86 0x41 0x20 0x46
 
-# CHECK: mov.s $f6,$f7
+# CHECK: mov.s $f6, $f7
 0x86 0x39 0x00 0x46
 
-# CHECK: move  a2,a1
-0x21 0x30 0xa0 0x00
-
-# CHECK: msub  a2,a3
+# CHECK: msub   $6,  $7
 0x04 0x00 0xc7 0x70
 
-# CHECK: msubu a2,a3
+# CHECK: msubu  $6,  $7
 0x05 0x00 0xc7 0x70
 
-# CHECK: mtc1  a2,$f7
+# CHECK: mtc1   $6, $f7
 0x00 0x38 0x86 0x44
 
-# CHECK: mthi  a3
+# CHECK: mthi   $7
 0x11 0x00 0xe0 0x00
 
-# CHECK: mtlo  a3
+# CHECK: mtlo   $7
 0x13 0x00 0xe0 0x00
 
-# CHECK: mul.d $f9,$f12,$f14
-0x42 0x32 0x27 0x46
+# CHECK: mul.d $f8, $f12, $f14
+0x02 0x62 0x2e 0x46
 
-# CHECK: mul.s $f9,$f6,$f7
+# CHECK: mul.s $f9, $f6, $f7
 0x42 0x32 0x07 0x46
 
-# CHECK: mul t1,a2,a3
+# CHECK: mul $9,  $6,  $7
 0x02 0x48 0xc7 0x70
 
-# CHECK: mult  v1,a1
+# CHECK: mult  $3, $5
 0x18 0x00 0x65 0x00
 
-# CHECK: multu v1,a1
+# CHECK: multu $3, $5
 0x19 0x00 0x65 0x00
 
-# CHECK: neg.d $f12,$f14
-0x87 0x39 0x20 0x46
+# CHECK: neg.d $f12, $f14
+0x07 0x73 0x20 0x46
 
-# CHECK: neg.s $f6,$f7
+# CHECK: neg.s $f6, $f7
 0x87 0x39 0x00 0x46
 
-# CHECK: neg v1,a1
-0x22 0x18 0x05 0x00
-
 # CHECK: nop
 0x00 0x00 0x00 0x00
 
-# CHECK: nor t1,a2,a3
+# CHECK: nor $9,  $6, $7
 0x27 0x48 0xc7 0x00
 
-# CHECK: not v1,a1
-0x27 0x18 0xa0 0x00
-
-# CHECK: or  v1,v1,a1
+# CHECK: or  $3, $3, $5
 0x25 0x18 0x65 0x00
 
-# CHECK: ori t1,a2,0x4567
+# CHECK: ori $9,  $6, 17767
 0x67 0x45 0xc9 0x34
 
-# CHECK: rdhwr a2,$29
-0x3b 0xe8 0x06 0x7c
-
-# CHECK: ror t1,a2,0x7
+# CHECK: rotr $9, $6, 7
 0xc2 0x49 0x26 0x00
 
-# CHECK: rorv  t1,a2,a3
+# CHECK:  rotrv $9, $6, $7
 0x46 0x48 0xe6 0x00
 
-# CHECK: round.w.d $f12,$f14
-0x8c 0x39 0x20 0x46
+# CHECK: round.w.d $f12, $f14
+0x0c 0x73 0x20 0x46
 
-# CHECK: round.w.s $f6,$f7
+# CHECK: round.w.s $f6, $f7
 0x8c 0x39 0x00 0x46
 
-# CHECK: sb  a0,9158(a1)
+# CHECK: sb  $4, 9158($5)
 0xc6 0x23 0xa4 0xa0
 
-# CHECK: sb  a0,6(a1)
+# CHECK: sb  $4, 6($5)
 0x06 0x00 0xa4 0xa0
 
-# CHECK: sc  t1,9158(a3)
+# CHECK: sc  $9, 9158($7)
 0xc6 0x23 0xe9 0xe0
 
-# CHECK: sdc1  $f9,9158(a3)
+# CHECK: sdc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xf4
 
-# CHECK: seb a2,a3
+# CHECK: seb $6, $7
 0x20 0x34 0x07 0x7c
 
-# CHECK: seh a2,a3
+# CHECK: seh $6, $7
 0x20 0x36 0x07 0x7c
 
-# CHECK: sh  a0,9158(a1)
+# CHECK: sh  $4, 9158($5)
 0xc6 0x23 0xa4 0xa4
 
-# CHECK: sll a0,v1,0x7
+# CHECK: sll $4, $3, 7
 0xc0 0x21 0x03 0x00
 
-# CHECK: sllv  v0,v1,a1
+# CHECK: sllv  $2, $3, $5
 0x04 0x10 0xa3 0x00
 
-# CHECK: slt v1,v1,a1
+# CHECK: slt $3, $3, $5
 0x2a 0x18 0x65 0x00
 
-# CHECK: slti  v1,v1,103
+# CHECK: slti  $3, $3, 103
 0x67 0x00 0x63 0x28
 
-# CHECK: sltiu v1,v1,103
+# CHECK: sltiu $3, $3, 103
 0x67 0x00 0x63 0x2c
 
-# CHECK: sltu  v1,v1,a1
+# CHECK: sltu  $3, $3, $5
 0x2b 0x18 0x65 0x00
 
-# CHECK: sqrt.d  $f12,$f14
-0x84 0x39 0x20 0x46
+# CHECK: sqrt.d  $f12, $f14
+0x04 0x73 0x20 0x46
 
-# CHECK: sqrt.s  $f6,$f7
+# CHECK: sqrt.s  $f6, $f7
 0x84 0x39 0x00 0x46
 
-# CHECK: sra a0,v1,0x7
+# CHECK: sra $4, $3, 7
 0xc3 0x21 0x03 0x00
 
-# CHECK: sra a0,v1,0x7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav  v0,v1,a1
+# CHECK: srav  $2, $3, $5
 0x07 0x10 0xa3 0x00
 
-# CHECK: srl a0,v1,0x7
+# CHECK: srl $4, $3, 7
 0xc2 0x21 0x03 0x00
 
-# CHECK: srlv  v0,v1,a1
+# CHECK: srlv  $2, $3, $5
 0x06 0x10 0xa3 0x00
 
-# CHECK: sub.d $f9,$f12,$f14
-0x41 0x32 0x27 0x46
+# CHECK: sub.d $f8, $f12, $f14
+0x01 0x62 0x2e 0x46
 
-# CHECK: sub.s $f9,$f6,$f7
+# CHECK: sub.s $f9, $f6, $f7
 0x41 0x32 0x07 0x46
 
-# CHECK: sub t1,a2,a3
+# CHECK: sub $9,  $6, $7
 0x22 0x48 0xc7 0x00
 
-# CHECK: subu  a0,v1,a1
+# CHECK: subu  $4, $3, $5
 0x23 0x20 0x65 0x00
 
-# CHECK: sw  a0,24(a1)
+# CHECK: sw  $4, 24($5)
 0x18 0x00 0xa4 0xac
 
-# CHECK: swc1  $f9,9158(a3)
+# CHECK: swc1  $f9, 9158($7)
 0xc6 0x23 0xe9 0xe4
 
-# CHECK: sync  0x7
+# CHECK: swl $4,  16($5)
+0x10 0x00 0xa4 0xa8
+
+# CHECK: swr $6, 16($7)
+0x10 0x00 0xe6 0xb8
+
+# CHECK: sync  7
 0xcf 0x01 0x00 0x00
 
-# CHECK: trunc.w.d $f12,$f14
-0x8d 0x39 0x20 0x46
+# CHECK: trunc.w.d $f12, $f14
+0x0d 0x73 0x20 0x46
 
-# CHECK: trunc.w.s $f6,$f7
+# CHECK: trunc.w.s $f6, $f7
 0x8d 0x39 0x00 0x46
 
-# CHECK: wsbh  a2,a3
+# CHECK: wsbh  $6, $7
 0xa0 0x30 0x07 0x7c
 
-# CHECK: xor v1,v1,a1
+# CHECK: xor $3, $3, $5
 0x26 0x18 0x65 0x00
 
-# CHECK: xori  t1,a2,0x4567
+# CHECK: xori  $9,  $6, 17767
 0x67 0x45 0xc9 0x38
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index 1c7447a..095ed18 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux
-
-# CHECK: daddiu t3,k0,31949
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
-# CHECK: daddu k0,at,t3
+# CHECK: daddu $26, $at, $11
 0x00 0x2b 0xd0 0x2d
 
-# CHECK: ddiv zero,k0,s6
+# CHECK: ddiv $zero, $26, $22
 0x03 0x56 0x00 0x1e
 
-# CHECK: ddivu zero,t1,t8
+# CHECK: ddivu $zero, $9, $24
 0x01 0x38 0x00 0x1f
 
-# CHECK: dmfc1 v0,$f14
+# CHECK: dmfc1 $2, $f14
 0x44 0x22 0x70 0x00
 
-# CHECK: dmtc1 s7,$f5
+# CHECK: dmtc1 $23, $f5
 0x44 0xb7 0x28 0x00
 
-# CHECK: dmult t3,k0
+# CHECK: dmult $11, $26
 0x01 0x7a 0x00 0x1c
 
-# CHECK: dmultu s7,t5
+# CHECK: dmultu $23, $13
 0x02 0xed 0x00 0x1d
 
-# CHECK: dsll v1,t8,0x11
+# CHECK: dsll $3, $24, 17
 0x00 0x18 0x1c 0x78
 
-# CHECK: dsllv gp,k1,t8
+# CHECK: dsllv $gp, $27, $24
 0x03 0x1b 0xe0 0x14
 
-# CHECK: dsra at,at,0x1e
+# CHECK: dsra $at, $at, 30
 0x00 0x01 0x0f 0xbb
 
-# CHECK: dsrav at,at,s8
+# CHECK: dsrav $at, $at, $fp
 0x03 0xc1 0x08 0x17
 
-# CHECK: dsrl t2,gp,0x18
+# CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0x56 0x3a
 
-# CHECK: dsrlv gp,t2,s7
+# CHECK: dsrlv $gp, $10, $23
 0x02 0xea 0xe0 0x16
 
-# CHECK: dsubu gp,k1,t8
+# CHECK: dsubu $gp, $27, $24
 0x03 0x78 0xe0 0x2f
 
-# CHECK: lw k1,-15155(at)
+# CHECK: lw $27, -15155($at)
 0x8c 0x3b 0xc4 0xcd
 
-# CHECK: lui at,0x1
+# CHECK: lui $at, 1
 0x3c 0x01 0x00 0x01
 
-# CHECK: lwu v1,-1746(v1)
+# CHECK: lwu $3, -1746($3)
 0x9c 0x63 0xf9 0x2e
 
-# CHECK: lui ra,0x1
+# CHECK: lui $ra, 1
 0x3c 0x1f 0x00 0x01
 
-# CHECK: sw k0,-15159(at)
+# CHECK: sw $26, -15159($at)
 0xac 0x3a 0xc4 0xc9
 
-# CHECK: ld k0,3958(zero)
+# CHECK: ld $26, 3958($zero)
 0xdc 0x1a 0x0f 0x76
 
-# CHECK: sd a2,17767(zero)
+# CHECK: sd $6, 17767($zero)
 0xfc 0x06 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index dd87522..c4e5591 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux
-
-# CHECK: daddiu t3,k0,31949
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
-# CHECK: daddu k0,at,t3
+# CHECK: daddu $26, $at, $11
 0x2d 0xd0 0x2b 0x00
 
-# CHECK: ddiv zero,k0,s6
+# CHECK: ddiv $zero, $26, $22
 0x1e 0x00 0x56 0x03
 
-# CHECK: ddivu zero,t1,t8
+# CHECK: ddivu $zero, $9, $24
 0x1f 0x00 0x38 0x01
 
-# CHECK: dmfc1 v0,$f14
+# CHECK: dmfc1 $2, $f14
 0x00 0x70 0x22 0x44
 
-# CHECK: dmtc1 s7,$f5
+# CHECK: dmtc1 $23, $f5
 0x00 0x28 0xb7 0x44
 
-# CHECK: dmult t3,k0
+# CHECK: dmult $11, $26
 0x1c 0x00 0x7a 0x01
 
-# CHECK: dmultu s7,t5
+# CHECK: dmultu $23, $13
 0x1d 0x00 0xed 0x02
 
-# CHECK: dsll v1,t8,0x11
+# CHECK: dsll $3, $24, 17
 0x78 0x1c 0x18 0x00
 
-# CHECK: dsllv gp,k1,t8
+# CHECK: dsllv $gp, $27, $24
 0x14 0xe0 0x1b 0x03
 
-# CHECK: dsra at,at,0x1e
+# CHECK: dsra $at, $at, 30
 0xbb 0x0f 0x01 0x00
 
-# CHECK: dsrav at,at,s8
+# CHECK: dsrav $at, $at, $fp
 0x17 0x08 0xc1 0x03
 
-# CHECK: dsrl t2,gp,0x18
+# CHECK: dsrl $10, $gp, 24
 0x3a 0x56 0x1c 0x00
 
-# CHECK: dsrlv gp,t2,s7
+# CHECK: dsrlv $gp, $10, $23
 0x16 0xe0 0xea 0x02
 
-# CHECK: dsubu gp,k1,t8
+# CHECK: dsubu $gp, $27, $24
 0x2f 0xe0 0x78 0x03
 
-# CHECK: lw k1,-15155(at)
+# CHECK: lw $27, -15155($at)
 0xcd 0xc4 0x3b 0x8c
 
-# CHECK: lui at,0x1
+# CHECK: lui $at, 1
 0x01 0x00 0x01 0x3c
 
-# CHECK: lwu v1,-1746(v1)
+# CHECK: lwu $3, -1746($3)
 0x2e 0xf9 0x63 0x9c
 
-# CHECK: lui ra,0x1
+# CHECK: lui $ra, 1
 0x01 0x00 0x1f 0x3c
 
-# CHECK: sw k0,-15159(at)
+# CHECK: sw $26, -15159($at)
 0xc9 0xc4 0x3a 0xac
 
-# CHECK: ld k0,3958(zero)
+# CHECK: ld $26, 3958($zero)
 0x76 0x0f 0x1a 0xdc
 
-# CHECK: sd a2,17767(zero)
+# CHECK: sd $6, 17767($zero)
 0x67 0x45 0x06 0xfc
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
index 26bc94d..41808c7 100644
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2
-
-# CHECK: daddiu t3,k0,31949
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
-# CHECK: daddu k0,at,t3
+# CHECK: daddu $26, $at, $11
 0x00 0x2b 0xd0 0x2d
 
-# CHECK: ddiv zero,k0,s6
+# CHECK: ddiv $zero, $26, $22
 0x03 0x56 0x00 0x1e
 
-# CHECK: ddivu zero,t1,t8
+# CHECK: ddivu $zero, $9, $24
 0x01 0x38 0x00 0x1f
 
-# CHECK: dmfc1 v0,$f14
+# CHECK: dmfc1 $2, $f14
 0x44 0x22 0x70 0x00
 
-# CHECK: dmtc1 s7,$f5
+# CHECK: dmtc1 $23, $f5
 0x44 0xb7 0x28 0x00
 
-# CHECK: dmult t3,k0
+# CHECK: dmult $11, $26
 0x01 0x7a 0x00 0x1c
 
-# CHECK: dmultu s7,t5
+# CHECK: dmultu $23, $13
 0x02 0xed 0x00 0x1d
 
-# CHECK: dsll v1,t8,0x11
+# CHECK: dsll $3, $24, 17
 0x00 0x18 0x1c 0x78
 
-# CHECK: dsllv gp,k1,t8
+# CHECK: dsllv $gp, $27, $24
 0x03 0x1b 0xe0 0x14
 
-# CHECK: dsra at,at,0x1e
+# CHECK: dsra $at, $at, 30
 0x00 0x01 0x0f 0xbb
 
-# CHECK: dsrav at,at,s8
+# CHECK: dsrav $at, $at, $fp
 0x03 0xc1 0x08 0x17
 
-# CHECK: dsrl t2,gp,0x18
+# CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0x56 0x3a
 
-# CHECK: dsrlv gp,t2,s7
+# CHECK: dsrlv $gp, $10, $23
 0x02 0xea 0xe0 0x16
 
-# CHECK: dsubu gp,k1,t8
+# CHECK: dsubu $gp, $27, $24
 0x03 0x78 0xe0 0x2f
 
-# CHECK: lw k1,-15155(at)
+# CHECK: lw $27, -15155($at)
 0x8c 0x3b 0xc4 0xcd
 
-# CHECK: lui at,0x1
+# CHECK: lui $at, 1
 0x3c 0x01 0x00 0x01
 
-# CHECK: lwu v1,-1746(v1)
+# CHECK: lwu $3, -1746($3)
 0x9c 0x63 0xf9 0x2e
 
-# CHECK: lui ra,0x1
+# CHECK: lui $ra, 1
 0x3c 0x1f 0x00 0x01
 
-# CHECK: sw k0,-15159(at)
+# CHECK: sw $26, -15159($at)
 0xac 0x3a 0xc4 0xc9
 
-# CHECK: ld k0,3958(zero)
+# CHECK: ld $26, 3958($zero)
 0xdc 0x1a 0x0f 0x76
 
-# CHECK: sd a2,17767(zero)
+# CHECK: sd $6, 17767($zero)
 0xfc 0x06 0x45 0x67
 
-# CHECK: dclo t1,t8
+# CHECK: dclo $9, $24
 0x73 0x09 0x48 0x25
 
-# CHECK: dclz k0,t1
+# CHECK: dclz $26, $9
 0x71 0x3a 0xd0 0x24
 
-# CHECK: dext a3,gp,0x1d,0x1f
+# CHECK: dext $7, $gp, 29, 31
 0x7f 0x87 0xf7 0x43
 
-# CHECK: dins s4,gp,0xf,0x1
+# CHECK: dins $20, $gp, 15, 1
 0x7f 0x94 0x7b 0xc7
 
-# CHECK: dsbh a3,gp
+# CHECK: dsbh $7, $gp
 0x7c 0x1c 0x38 0xa4
 
-# CHECK: dshd v1,t6
+# CHECK: dshd $3, $14
 0x7c 0x0e 0x19 0x64
 
-# CHECK: drotr s4,k1,0x6
+# CHECK: drotr $20, $27, 6
 0x00 0x3b 0xa1 0xba
 
-# CHECK: drotrv t8,s7,a1
+# CHECK: drotrv $24, $23, $5
 0x00 0xb7 0xc0 0x56
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
index 81a7c66..4987f80 100644
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips64r2_le.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2
-
-# CHECK: daddiu t3,k0,31949
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
-# CHECK: daddu k0,at,t3
+# CHECK: daddu $26, $at, $11
 0x2d 0xd0 0x2b 0x00
 
-# CHECK: ddiv zero,k0,s6
+# CHECK: ddiv $zero, $26, $22
 0x1e 0x00 0x56 0x03
 
-# CHECK: ddivu zero,t1,t8
+# CHECK: ddivu $zero, $9, $24
 0x1f 0x00 0x38 0x01
 
-# CHECK: dmfc1 v0,$f14
+# CHECK: dmfc1 $2, $f14
 0x00 0x70 0x22 0x44
 
-# CHECK: dmtc1 s7,$f5
+# CHECK: dmtc1 $23, $f5
 0x00 0x28 0xb7 0x44
 
-# CHECK: dmult t3,k0
+# CHECK: dmult $11, $26
 0x1c 0x00 0x7a 0x01
 
-# CHECK: dmultu s7,t5
+# CHECK: dmultu $23, $13
 0x1d 0x00 0xed 0x02
 
-# CHECK: dsll v1,t8,0x11
+# CHECK: dsll $3, $24, 17
 0x78 0x1c 0x18 0x00
 
-# CHECK: dsllv gp,k1,t8
+# CHECK: dsllv $gp, $27, $24
 0x14 0xe0 0x1b 0x03
 
-# CHECK: dsra at,at,0x1e
+# CHECK: dsra $at, $at, 30
 0xbb 0x0f 0x01 0x00
 
-# CHECK: dsrav at,at,s8
+# CHECK: dsrav $at, $at, $fp
 0x17 0x08 0xc1 0x03
 
-# CHECK: dsrl t2,gp,0x18
+# CHECK: dsrl $10, $gp, 24
 0x3a 0x56 0x1c 0x00
 
-# CHECK: dsrlv gp,t2,s7
+# CHECK: dsrlv $gp, $10, $23
 0x16 0xe0 0xea 0x02
 
-# CHECK: dsubu gp,k1,t8
+# CHECK: dsubu $gp, $27, $24
 0x2f 0xe0 0x78 0x03
 
-# CHECK: lw k1,-15155(at)
+# CHECK: lw $27, -15155($at)
 0xcd 0xc4 0x3b 0x8c
 
-# CHECK: lui at,0x1
+# CHECK: lui $at, 1
 0x01 0x00 0x01 0x3c
 
-# CHECK: lwu v1,-1746(v1)
+# CHECK: lwu $3, -1746($3)
 0x2e 0xf9 0x63 0x9c
 
-# CHECK: lui ra,0x1
+# CHECK: lui $ra, 1
 0x01 0x00 0x1f 0x3c
 
-# CHECK: sw k0,-15159(at)
+# CHECK: sw $26, -15159($at)
 0xc9 0xc4 0x3a 0xac
 
-# CHECK: ld k0,3958(zero)
+# CHECK: ld $26, 3958($zero)
 0x76 0x0f 0x1a 0xdc
 
-# CHECK: sd a2,17767(zero)
+# CHECK: sd $6, 17767($zero)
 0x67 0x45 0x06 0xfc
 
-# CHECK: dclo t1,t8
+# CHECK: dclo $9, $24
 0x25 0x48 0x09 0x73
 
-# CHECK: dclz k0,t1
+# CHECK: dclz $26, $9
 0x24 0xd0 0x3a 0x71
 
-# CHECK: dext a3,gp,0x1d,0x1f
+# CHECK: dext $7, $gp, 29, 31
 0x43 0xf7 0x87 0x7f
 
-# CHECK: dins s4,gp,0xf,0x1
+# CHECK: dins $20, $gp, 15, 1
 0xc7 0x7b 0x94 0x7f
 
-# CHECK: dsbh a3,gp
+# CHECK: dsbh $7, $gp
 0xa4 0x38 0x1c 0x7c
 
-# CHECK: dshd v1,t6
+# CHECK: dshd $3, $14
 0x64 0x19 0x0e 0x7c
 
-# CHECK: drotr s4,k1,0x6
+# CHECK: drotr $20, $27, 6
 0xba 0xa1 0x3b 0x00
 
-# CHECK: drotrv t8,s7,a1
+# CHECK: drotrv $24, $23, $5
 0x56 0xc0 0xb7 0x00
diff --git a/test/MC/Disassembler/X86/enhanced.txt b/test/MC/Disassembler/X86/enhanced.txt
index 752ab17..deff735 100644
--- a/test/MC/Disassembler/X86/enhanced.txt
+++ b/test/MC/Disassembler/X86/enhanced.txt
@@ -1,10 +1,10 @@
-# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
 
-# CHECK: [o:jne][w:	][0-p:-][0-l:10=10] <br> 0:[RIP/111](pc)=18446744073709551606
+# CHECK: [o:jne][w:	][0-p:-][0-l:10=10] <br> 0:[RIP/112](pc)=18446744073709551606
 0x0f 0x85 0xf6 0xff 0xff 0xff
-# CHECK: [o:movq][w:	][1-r:%gs=r63][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r108] <mov> 0:[RCX/108]=0 1:[GS/63]=8
+# CHECK: [o:movq][w:	][1-r:%gs=r64][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r109] <mov> 0:[RCX/109]=0 1:[GS/64]=8
 0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00
-# CHECK: [o:xorps][w:	][2-r:%xmm1=r129][p:,][w: ][0-r:%xmm2=r130] 0:[XMM2/130]=0 1:[XMM2/130]=0 2:[XMM1/129]=0
+# CHECK: [o:xorps][w:	][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
 0x0f 0x57 0xd1
-# CHECK: [o:andps][w:	][2-r:%xmm1=r129][p:,][w: ][0-r:%xmm2=r130] 0:[XMM2/130]=0 1:[XMM2/130]=0 2:[XMM1/129]=0
+# CHECK: [o:andps][w:	][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
 0x0f 0x54 0xd1
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index a5dbcf2..27694cd 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -105,3 +105,8 @@
 # CHECK: retf
 0x66 0xcb
 
+# CHECK: vpgatherqq YMM2, QWORD PTR [RDI + 2*YMM1], YMM0
+0xc4 0xe2 0xfd 0x91 0x14 0x4f
+
+# CHECK: vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+0xc4 0x02 0x39 0x90 0x14 0x4f
diff --git a/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt b/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt
index 9feb54c..31a3804 100644
--- a/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt
+++ b/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # This instruction would decode as movmskps if the vvvv field in the VEX prefix was all 1s.
 0xc5 0xf0 0x50 0xc0
diff --git a/test/MC/Disassembler/X86/invalid-cmp-imm.txt b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
index bf8699b..7b2ea2a 100644
--- a/test/MC/Disassembler/X86/invalid-cmp-imm.txt
+++ b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | grep "invalid instruction encoding"
 
 # This instruction would decode as cmpordps if the immediate byte was less than 8.
 0x0f 0xc2 0xc7 0x08
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index c0e77d06..672d239 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -123,10 +123,10 @@
 # CHECK: vcvtss2sil %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %rax
+# CHECK: vcvtsd2siq %xmm0, %rax
 0xc4 0xe1 0xfb 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -437,10 +437,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %rax
+# CHECK: vcvtsd2siq %xmm0, %rax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
@@ -725,6 +725,30 @@
 # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
 
+# CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
+0xc4 0xe2 0xf9 0x92 0x14 0x4f
+
+# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+0xc4 0xe2 0xfd 0x92 0x14 0x4f
+
+# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+0xc4 0x02 0x39 0x93 0x14 0x4f
+
+# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
+0xc4 0x02 0x3d 0x93 0x14 0x4f
+
+# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+0xc4 0xe2 0xf9 0x90 0x14 0x4f
+
+# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+0xc4 0xe2 0xfd 0x90 0x14 0x4f
+
+# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+0xc4 0x02 0x39 0x91 0x14 0x4f
+
+# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+0xc4 0x02 0x3d 0x91 0x14 0x4f
+
 # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
 # CHECK: lock
 # CHECK-NEXT: xaddq	%rcx, %rbx
diff --git a/test/MC/Disassembler/X86/truncated-input.txt b/test/MC/Disassembler/X86/truncated-input.txt
index 34cf038..83be1ca 100644
--- a/test/MC/Disassembler/X86/truncated-input.txt
+++ b/test/MC/Disassembler/X86/truncated-input.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
 
 # CHECK: warning
 0x00
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 739fa6a..899657b 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -159,10 +159,10 @@
 # CHECK: vcvtss2sil %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7b 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%eax)
@@ -460,10 +460,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
@@ -612,3 +612,21 @@
 
 # CHECK: shrxl %esi, %ebx, %edx
 0xc4 0xe2 0x0b 0xf7 0xd3
+
+# CHECK: extrq  $2, $3, %xmm0
+0x66 0x0f 0x78 0xc0 0x03 0x02
+
+# CHECK: extrq  %xmm1, %xmm0
+0x66 0x0f 0x79 0xc1
+
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+0xf2 0x0f 0x78 0xc1 0x05 0x06
+
+# CHECK: insertq %xmm1, %xmm0
+0xf2 0x0f 0x79 0xc1
+
+# CHECK: movntsd %xmm0, (%edi)
+0xf2 0x0f 0x2b 0x07
+
+# CHECK: movntss %xmm0, (%edi)
+0xf3 0x0f 0x2b 0x07
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index f4b8f46..df449a4 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -61,3 +61,21 @@
 
 # CHECK: cmpordsd 
 0xf2 0x0f 0xc2 0xc7 0x07
+
+# CHECK: extrq  $2, $3, %xmm0
+0x66 0x0f 0x78 0xc0 0x03 0x02
+
+# CHECK: extrq  %xmm1, %xmm0
+0x66 0x0f 0x79 0xc1
+
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+0xf2 0x0f 0x78 0xc1 0x05 0x06
+
+# CHECK: insertq %xmm1, %xmm0
+0xf2 0x0f 0x79 0xc1
+
+# CHECK: movntsd %xmm0, (%rdi)
+0xf2 0x0f 0x2b 0x07
+
+# CHECK: movntss %xmm0, (%rdi)
+0xf3 0x0f 0x2b 0x07
diff --git a/test/MC/ELF/fde.s b/test/MC/ELF/fde.s
new file mode 100644
index 0000000..52ee33f
--- /dev/null
+++ b/test/MC/ELF/fde.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -filetype=obj %s -o %t.o -triple x86_64-pc-linux-gnu && llvm-objdump -s %t.o
+# PR13581
+
+# CHECK: Contents of section .debug_frame:
+# CHECK-NEXT:  0000 14000000 ffffffff 01000178 100c0708  ...........x....
+# CHECK-NEXT:  0010 90010000 00000000 1c000000 00000000  ................
+# CHECK-NEXT:  0020 00000000 00000000 11000000 00000000  ................
+# CHECK-NEXT:  0030 410e1086 02430d06                    A....C..
+
+__cxx_global_var_init:                  # @__cxx_global_var_init
+        .cfi_startproc
+.Lfunc_begin0:
+# BB#0:                                 # %entry
+        pushq   %rbp
+.Ltmp2:
+        .cfi_def_cfa_offset 16
+.Ltmp3:
+        .cfi_offset %rbp, -16
+        movq    %rsp, %rbp
+.Ltmp4:
+        .cfi_def_cfa_register %rbp
+.Ltmp5:
+        callq   _Z2rsv@PLT
+        movl    %eax, _ZL1i(%rip)
+        popq    %rbp
+        ret
+        .cfi_endproc
+        .cfi_sections .debug_frame
diff --git a/test/MC/ELF/version.s b/test/MC/ELF/version.s
new file mode 100644
index 0000000..31e952a
--- /dev/null
+++ b/test/MC/ELF/version.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck  %s
+
+.version "1234"
+.version "123"
+
+// CHECK:       (('sh_name', 0x0000000c) # '.note'
+// CHECK-NEXT:   ('sh_type', 0x00000007)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000034)
+// CHECK-NEXT:   ('sh_size', 0x00000024)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:   ('_section_data', '05000000 00000000 01000000 31323334 00000000 04000000 00000000 01000000 31323300')
+// CHECK-NEXT:  ),
diff --git a/test/MC/MachO/ARM/data-in-code.s b/test/MC/MachO/ARM/data-in-code.s
new file mode 100644
index 0000000..bbcb9aa
--- /dev/null
+++ b/test/MC/MachO/ARM/data-in-code.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -triple armv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+        .text
+_foo:
+@ CHECK: # DICE 0
+@ CHECK: ('offset', 0)
+@ CHECK: ('length', 4)
+@ CHECK: ('kind', 1)
+@ CHECK: # DICE 1
+@ CHECK: ('offset', 4)
+@ CHECK: ('length', 4)
+@ CHECK: ('kind', 4)
+@ CHECK: # DICE 2
+@ CHECK: ('offset', 8)
+@ CHECK: ('length', 2)
+@ CHECK: ('kind', 3)
+@ CHECK: # DICE 3
+@ CHECK: ('offset', 10)
+@ CHECK: ('length', 1)
+@ CHECK: ('kind', 2)
+
+.data_region
+        .long 10
+.end_data_region
+.data_region jt32
+        .long 1
+.end_data_region
+.data_region jt16
+        .short 2
+.end_data_region
+.data_region jt8
+        .byte 3
+.end_data_region
+
diff --git a/test/MC/MachO/ARM/llvm-objdump-macho-stripped.s b/test/MC/MachO/ARM/llvm-objdump-macho-stripped.s
new file mode 100644
index 0000000..7fcec52
--- /dev/null
+++ b/test/MC/MachO/ARM/llvm-objdump-macho-stripped.s
@@ -0,0 +1,5 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-ios -filetype=obj -o - < %s | llvm-objdump -d -macho -triple=thumbv7-apple-ios - | FileCheck %s
+	nop
+# CHECK:        0:	00 bf                                        	nop
+# We are checking that disassembly happens when there are no symbols.
+# rdar://11460289
diff --git a/test/MC/MachO/ARM/llvm-objdump-macho.s b/test/MC/MachO/ARM/llvm-objdump-macho.s
new file mode 100644
index 0000000..c8aec93
--- /dev/null
+++ b/test/MC/MachO/ARM/llvm-objdump-macho.s
@@ -0,0 +1,20 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-ios -filetype=obj -o - < %s | llvm-objdump -d -macho -triple=thumbv7-apple-ios - | FileCheck %s
+.thumb
+.thumb_func _fib
+_fib:
+	push	{r7, lr}
+	pop	{r7, pc}
+.thumb_func _main
+_main:
+	push	{r7, lr}
+        pop	{r7, pc}
+	nop
+# CHECK: _fib:
+# CHECK:        0:	80 b5                                        	push	{r7, lr}
+# CHECK:        2:	80 bd                                        	pop	{r7, pc}
+# CHECK: _main:
+# CHECK:        4:	80 b5                                        	push	{r7, lr}
+# CHECK:        6:	80 bd                                        	pop	{r7, pc}
+# CHECK:        8:	00 bf                                        	nop
+# We are checking that second function is fully disassembled.
+# rdar://11426465
diff --git a/test/MC/MachO/ARM/thumb-bl-jbits.s b/test/MC/MachO/ARM/thumb-bl-jbits.s
new file mode 100644
index 0000000..9657968
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb-bl-jbits.s
@@ -0,0 +1,19 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | macho-dump --dump-section-data | FileCheck %s
+.thumb
+.thumb_func t
+t:	nop
+
+.data
+.space 4441096 - 4 - 2
+
+.section __TEXT, __branch, regular, pure_instructions
+.thumb
+.thumb_func b
+b:
+	bl	t
+# CHECK: '_section_data', 'c3f7fcf5'
+# We are checking that the branch and link instruction which is:
+#	bl	#-4441096
+# has it displacement encoded correctly with respect to the J1 and J2 bits when
+# the branch is assembled with a label not a displacement.
+# rdar://10149689
diff --git a/test/MC/MachO/ARM/thumb2-movw-fixup.s b/test/MC/MachO/ARM/thumb2-movw-fixup.s
new file mode 100644
index 0000000..57973a8
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb2-movw-fixup.s
@@ -0,0 +1,44 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+
+@ rdar://10038370
+
+	.syntax unified
+  .text
+	.align	2
+	.code	16           
+	.thumb_func	_foo
+  movw	r2, :lower16:L1
+	movt	r2, :upper16:L1
+  movw	r12, :lower16:L2
+	movt	r12, :upper16:L2
+  .space 70000
+  
+  .data
+L1: .long 0
+L2: .long 0
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0xc),
+@ CHECK:     ('word-1', 0x86000002)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0x1184),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 2
+@ CHECK:    (('word-0', 0x8),
+@ CHECK:     ('word-1', 0x84000002)),
+@ CHECK:    # Relocation 3
+@ CHECK:    (('word-0', 0x1),
+@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK:    # Relocation 4
+@ CHECK:    (('word-0', 0x4),
+@ CHECK:     ('word-1', 0x86000002)),
+@ CHECK:    # Relocation 5
+@ CHECK:    (('word-0', 0x1180),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 6
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x84000002)),
+@ CHECK:    # Relocation 7
+@ CHECK:    (('word-0', 0x1),
+@ CHECK:     ('word-1', 0x14ffffff)),
diff --git a/test/MC/MachO/previous.s b/test/MC/MachO/previous.s
new file mode 100644
index 0000000..41077cd
--- /dev/null
+++ b/test/MC/MachO/previous.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o - | FileCheck %s
+
+.text
+// CHECK: .section __TEXT,__text
+
+.data
+// CHECK: .section __DATA,__data
+
+.previous
+// CHECK: .section __TEXT,__text
+
+.previous
+// CHECK: .section __DATA,__data
diff --git a/test/MC/MachO/pushsection.s b/test/MC/MachO/pushsection.s
new file mode 100644
index 0000000..6881323
--- /dev/null
+++ b/test/MC/MachO/pushsection.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o - | FileCheck %s
+
+.text
+// CHECK: .section __TEXT,__text
+
+.pushsection __DATA, __data
+// CHECK: .section __DATA,__data
+
+.pushsection __TEXT, initcode
+// CHECK: .section __TEXT,initcode
+        
+.popsection
+// CHECK: .section __DATA,__data
+        
+.popsection
+// CHECK: .section __TEXT,__text
diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll
new file mode 100644
index 0000000..23ec53a
--- /dev/null
+++ b/test/MC/Mips/elf-N64.ll
@@ -0,0 +1,39 @@
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check for N64 relocation production.
+;
+; ModuleID = '../hello.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v64:64:64-n32"
+target triple = "mips64el-unknown-linux"
+
+@str = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+define i32 @main() nounwind {
+entry:
+; Check that the appropriate relocations were created.
+
+; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+; CHECK:     ('r_type3', 0x05)
+; CHECK-NEXT:     ('r_type2', 0x18)
+; CHECK-NEXT:     ('r_type', 0x07)
+
+; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+; CHECK:     ('r_type3', 0x06)
+; CHECK-NEXT:     ('r_type2', 0x18)
+; CHECK-NEXT:     ('r_type', 0x07)
+
+; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
+; CHECK:     ('r_type3', 0x00)
+; CHECK-NEXT:     ('r_type2', 0x00)
+; CHECK-NEXT:     ('r_type', 0x14)
+
+; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
+; CHECK:     ('r_type3', 0x00)
+; CHECK-NEXT:     ('r_type2', 0x00)
+; CHECK-NEXT:     ('r_type', 0x15)
+
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
+  ret i32 0
+
+}
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
index 71c69bb..7111deb 100644
--- a/test/MC/Mips/elf-bigendian.ll
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -1,4 +1,6 @@
-; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLE: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: false
+; XFAIL: *
 
 ; Check that this is big endian.
 ; CHECK: ('e_indent[EI_DATA]', 0x02)
diff --git a/test/MC/Mips/elf-objdump.s b/test/MC/Mips/elf-objdump.s
new file mode 100644
index 0000000..6a5c2a5
--- /dev/null
+++ b/test/MC/Mips/elf-objdump.s
@@ -0,0 +1,11 @@
+// 32 bit big endian
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux  - | FileCheck %s
+// 32 bit little endian
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux  - | FileCheck %s
+// 64 bit big endian
+// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s
+
+// We just want to see if llvm-objdump works at all.
+// CHECK: .text
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index 7a79fa0..ffc3b11 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -30,3 +30,6 @@
 // CHECK-LE64: ('e_indent[EI_CLASS]', 0x02)
 // This is little endian.
 // CHECK-LE64: ('e_indent[EI_DATA]', 0x01)
+
+// Check that we are setting EI_OSABI to ELFOSABI_LINUX.
+// CHECK-LE64: ('e_indent[EI_OSABI]', 0x03)
diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll
new file mode 100644
index 0000000..81a89e3
--- /dev/null
+++ b/test/MC/Mips/higher_highest.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created.
+
+; CHECK:     ('r_type', 0x1d)
+; CHECK:     ('r_type', 0x1d)
+; CHECK:     ('r_type', 0x1c)
+; CHECK:     ('r_type', 0x1c)
+
+@g0 = external global i32
+
+define void @foo1(i32 %s) nounwind {
+entry:
+
+  %tobool = icmp eq i32 %s, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* @g0, align 4
+  %add = add nsw i32 %0, 12
+  store i32 %add, i32* @g0, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
diff --git a/test/MC/Mips/lea_64.ll b/test/MC/Mips/lea_64.ll
new file mode 100644
index 0000000..2e7a37b
--- /dev/null
+++ b/test/MC/Mips/lea_64.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple mips64el - \
+; RUN:  | FileCheck %s
+
+@p = external global i32*
+
+define void @f1() nounwind {
+entry:
+; CHECK: .text:
+; CHECK-NOT: addiu {{[0-9,a-f]+}}, {{[0-9,a-f]+}}, {{[0-9]+}}
+
+  %a = alloca [10 x i32], align 4
+  %arraydecay = getelementptr inbounds [10 x i32]* %a, i64 0, i64 0
+  store i32* %arraydecay, i32** @p, align 8
+  ret void
+
+; CHECK: jr $ra
+}
diff --git a/test/MC/Mips/mips64shift.ll b/test/MC/Mips/mips64shift.ll
new file mode 100644
index 0000000..7817b96
--- /dev/null
+++ b/test/MC/Mips/mips64shift.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - | llvm-objdump -disassemble -triple mips64el - | FileCheck %s
+
+
+define i64 @f3(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shl = shl i64 %a0, 10
+  ret i64 %shl
+}
+
+define i64 @f4(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shr = ashr i64 %a0, 10
+  ret i64 %shr
+}
+
+define i64 @f5(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shr = lshr i64 %a0, 10
+  ret i64 %shr
+}
+
+define i64 @f6(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shl = shl i64 %a0, 40
+  ret i64 %shl
+}
+
+define i64 @f7(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shr = ashr i64 %a0, 40
+  ret i64 %shr
+}
+
+define i64 @f8(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shr = lshr i64 %a0, 40
+  ret i64 %shr
+}
+
diff --git a/test/MC/Mips/multi-64bit-func.ll b/test/MC/Mips/multi-64bit-func.ll
new file mode 100644
index 0000000..6e0d784
--- /dev/null
+++ b/test/MC/Mips/multi-64bit-func.ll
@@ -0,0 +1,23 @@
+; There is no real check here. If the test doesn't 
+; assert it passes.
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s 
+; Run it again without extra nop in delay slot
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -enable-mips-delay-filler < %s 
+
+define i32 @bosco1(i32 %x) nounwind readnone {
+entry:
+  %inc = add i32 %x, 1
+  ret i32 %inc
+}
+
+define i32 @bosco2(i32 %x) nounwind readnone {
+entry:
+  %inc = add i32 %x, 1
+  ret i32 %inc
+}
+
+define i32 @bosco3(i32 %x) nounwind readnone {
+entry:
+  %inc = add i32 %x, 1
+  ret i32 %inc
+}
diff --git a/test/MC/Mips/r-mips-got-disp.ll b/test/MC/Mips/r-mips-got-disp.ll
new file mode 100644
index 0000000..73396ac
--- /dev/null
+++ b/test/MC/Mips/r-mips-got-disp.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the R_MIPS_GOT_DISP relocations were created.
+
+; CHECK:     ('r_type', 0x13)
+
+@shl = global i64 1, align 8
+@.str = private unnamed_addr constant [8 x i8] c"0x%llx\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i64* @shl, align 8
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i64 %0) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
diff --git a/test/MC/Mips/sext_64_32.ll b/test/MC/Mips/sext_64_32.ll
new file mode 100644
index 0000000..e5c57b8
--- /dev/null
+++ b/test/MC/Mips/sext_64_32.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - | llvm-objdump -disassemble -triple mips64el - | FileCheck %s
+
+; Sign extend from 32 to 64 was creating nonsense opcodes
+
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+
+define i64 @foo(i32 %ival) nounwind readnone {
+entry:
+  %conv = sext i32 %ival to i64
+  ret i64 %conv
+}
+
+; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 0
+
+define i64 @foo_2(i32 %ival_2) nounwind readnone {
+entry:
+  %conv_2 = zext i32 %ival_2 to i64
+  ret i64 %conv_2
+}
+
diff --git a/test/MC/Mips/sym-offset.ll b/test/MC/Mips/sym-offset.ll
index 5939935..5162c91 100644
--- a/test/MC/Mips/sym-offset.ll
+++ b/test/MC/Mips/sym-offset.ll
@@ -1,4 +1,6 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLED: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: false
+; XFAIL: *
 
 ; FIXME: use assembler instead of llc when it becomes available.
 
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 7cd5677..7edd26a 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -63,4 +63,6 @@ _main:
         mov     ECX, DWORD PTR [4*ECX + _fnan]
 // CHECK:       movq    %fs:320, %rax
         mov     RAX, QWORD PTR FS:[320]
+// CHECK:       vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
+        vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
 	ret
diff --git a/test/MC/X86/x86-32-avx.s b/test/MC/X86/x86-32-avx.s
index e13a871..586f3fe 100644
--- a/test/MC/X86/x86-32-avx.s
+++ b/test/MC/X86/x86-32-avx.s
@@ -2603,11 +2603,11 @@
 // CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
           vcvttpd2dq  %xmm1, %xmm5
 
-// CHECK: vcvttpd2dq  %ymm2, %xmm5
+// CHECK: vcvttpd2dqy %ymm2, %xmm5
 // CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
           vcvttpd2dq  %ymm2, %xmm5
 
-// CHECK: vcvttpd2dqx  %xmm1, %xmm5
+// CHECK: vcvttpd2dq   %xmm1, %xmm5
 // CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
           vcvttpd2dqx  %xmm1, %xmm5
 
@@ -2623,11 +2623,11 @@
 // CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
           vcvttpd2dqy  (%eax), %xmm1
 
-// CHECK: vcvtpd2ps  %ymm2, %xmm5
+// CHECK: vcvtpd2psy %ymm2, %xmm5
 // CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
           vcvtpd2ps  %ymm2, %xmm5
 
-// CHECK: vcvtpd2psx  %xmm1, %xmm5
+// CHECK: vcvtpd2ps   %xmm1, %xmm5
 // CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
           vcvtpd2psx  %xmm1, %xmm5
 
@@ -2643,7 +2643,7 @@
 // CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
           vcvtpd2psy  (%eax), %xmm1
 
-// CHECK: vcvtpd2dq  %ymm2, %xmm5
+// CHECK: vcvtpd2dqy %ymm2, %xmm5
 // CHECK: encoding: [0xc5,0xff,0xe6,0xea]
           vcvtpd2dq  %ymm2, %xmm5
 
@@ -2655,7 +2655,7 @@
 // CHECK: encoding: [0xc5,0xff,0xe6,0x08]
           vcvtpd2dqy  (%eax), %xmm1
 
-// CHECK: vcvtpd2dqx  %xmm1, %xmm5
+// CHECK: vcvtpd2dq   %xmm1, %xmm5
 // CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
           vcvtpd2dqx  %xmm1, %xmm5
 
@@ -3103,21 +3103,21 @@
 // CHECK: encoding: [0xc5,0xf8,0x77]
           vzeroupper
 
-// CHECK: vcvtsd2si  %xmm4, %ecx
+// CHECK: vcvtsd2sil  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
-          vcvtsd2si  %xmm4, %ecx
+          vcvtsd2sil  %xmm4, %ecx
 
-// CHECK: vcvtsd2si  (%ecx), %ecx
+// CHECK: vcvtsd2sil  (%ecx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%ecx), %ecx
+          vcvtsd2sil  (%ecx), %ecx
 
-// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+// CHECK: vcvtsi2sd  (%ebp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+          vcvtsi2sd  (%ebp), %xmm0, %xmm7
 
-// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
+// CHECK: vcvtsi2sd  (%esp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
-          vcvtsi2sdl  (%esp), %xmm0, %xmm7
+          vcvtsi2sd  (%esp), %xmm0, %xmm7
 
 // CHECK: vlddqu  (%eax), %ymm2
 // CHECK: encoding: [0xc5,0xff,0xf0,0x10]
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 6c27b85..0824916 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -19626,3 +19626,29 @@
           dppd $0x81, %xmm2, %xmm1
 // CHECK: insertps $129, %xmm2, %xmm1
           insertps $0x81, %xmm2, %xmm1
+
+// PR13253 handle implicit optional third argument that must always be xmm0
+// CHECK: pblendvb %xmm2, %xmm1
+pblendvb %xmm2, %xmm1
+// CHECK: pblendvb %xmm2, %xmm1
+pblendvb %xmm0, %xmm2, %xmm1
+// CHECK: pblendvb (%eax), %xmm1
+pblendvb (%eax), %xmm1
+// CHECK: pblendvb (%eax), %xmm1
+pblendvb %xmm0, (%eax), %xmm1
+// CHECK: blendvpd %xmm2, %xmm1
+blendvpd %xmm2, %xmm1
+// CHECK: blendvpd %xmm2, %xmm1
+blendvpd %xmm0, %xmm2, %xmm1
+// CHECK: blendvpd (%eax), %xmm1
+blendvpd (%eax), %xmm1
+// CHECK: blendvpd (%eax), %xmm1
+blendvpd %xmm0, (%eax), %xmm1
+// CHECK: blendvps %xmm2, %xmm1
+blendvps %xmm2, %xmm1
+// CHECK: blendvps %xmm2, %xmm1
+blendvps %xmm0, %xmm2, %xmm1
+// CHECK: blendvps (%eax), %xmm1
+blendvps (%eax), %xmm1
+// CHECK: blendvps (%eax), %xmm1
+blendvps %xmm0, (%eax), %xmm1
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index bd5559a..46ff9ea 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -3368,11 +3368,11 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
           vcvttpd2dq  %xmm11, %xmm10
 
-// CHECK: vcvttpd2dq  %ymm12, %xmm10
+// CHECK: vcvttpd2dqy %ymm12, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
           vcvttpd2dq  %ymm12, %xmm10
 
-// CHECK: vcvttpd2dqx  %xmm11, %xmm10
+// CHECK: vcvttpd2dq   %xmm11, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
           vcvttpd2dqx  %xmm11, %xmm10
 
@@ -3388,11 +3388,11 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
           vcvttpd2dqy  (%rax), %xmm11
 
-// CHECK: vcvtpd2ps  %ymm12, %xmm10
+// CHECK: vcvtpd2psy %ymm12, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
           vcvtpd2ps  %ymm12, %xmm10
 
-// CHECK: vcvtpd2psx  %xmm11, %xmm10
+// CHECK: vcvtpd2ps   %xmm11, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
           vcvtpd2psx  %xmm11, %xmm10
 
@@ -3408,7 +3408,7 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
           vcvtpd2psy  (%rax), %xmm11
 
-// CHECK: vcvtpd2dq  %ymm12, %xmm10
+// CHECK: vcvtpd2dqy %ymm12, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
           vcvtpd2dq  %ymm12, %xmm10
 
@@ -3420,7 +3420,7 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
           vcvtpd2dqy  (%rax), %xmm11
 
-// CHECK: vcvtpd2dqx  %xmm11, %xmm10
+// CHECK: vcvtpd2dq   %xmm11, %xmm10
 // CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
           vcvtpd2dqx  %xmm11, %xmm10
 
@@ -3860,29 +3860,29 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
           vperm2f128  $7, (%rax), %ymm10, %ymm11
 
-// CHECK: vcvtsd2si  %xmm8, %r8d
+// CHECK: vcvtsd2sil  %xmm8, %r8d
 // CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
-          vcvtsd2si  %xmm8, %r8d
+          vcvtsd2sil  %xmm8, %r8d
 
-// CHECK: vcvtsd2si  (%rcx), %ecx
+// CHECK: vcvtsd2sil  (%rcx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%rcx), %ecx
+          vcvtsd2sil  (%rcx), %ecx
 
-// CHECK: vcvtss2si  %xmm4, %rcx
+// CHECK: vcvtss2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
-          vcvtss2si  %xmm4, %rcx
+          vcvtss2siq  %xmm4, %rcx
 
-// CHECK: vcvtss2si  (%rcx), %r8
+// CHECK: vcvtss2siq  (%rcx), %r8
 // CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
-          vcvtss2si  (%rcx), %r8
+          vcvtss2siq  (%rcx), %r8
 
-// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
+// CHECK: vcvtsi2sd  %r8d, %xmm8, %xmm15
 // CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
-          vcvtsi2sdl  %r8d, %xmm8, %xmm15
+          vcvtsi2sd  %r8d, %xmm8, %xmm15
 
-// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+// CHECK: vcvtsi2sd  (%rbp), %xmm8, %xmm15
 // CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+          vcvtsi2sd  (%rbp), %xmm8, %xmm15
 
 // CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
 // CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
@@ -3900,21 +3900,21 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
           vcvtsi2ssq  (%rcx), %xmm4, %xmm6
 
-// CHECK: vcvttsd2si  %xmm4, %rcx
+// CHECK: vcvttsd2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
-          vcvttsd2si  %xmm4, %rcx
+          vcvttsd2siq  %xmm4, %rcx
 
-// CHECK: vcvttsd2si  (%rcx), %rcx
+// CHECK: vcvttsd2siq  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
-          vcvttsd2si  (%rcx), %rcx
+          vcvttsd2siq  (%rcx), %rcx
 
-// CHECK: vcvttss2si  %xmm4, %rcx
+// CHECK: vcvttss2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
-          vcvttss2si  %xmm4, %rcx
+          vcvttss2siq  %xmm4, %rcx
 
-// CHECK: vcvttss2si  (%rcx), %rcx
+// CHECK: vcvttss2siq  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
-          vcvttss2si  (%rcx), %rcx
+          vcvttss2siq  (%rcx), %rcx
 
 // CHECK: vlddqu  (%rax), %ymm12
 // CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
@@ -4121,3 +4121,67 @@ _foo:
 _foo2:
   nop
   vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
+
+// CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xf9,0x92,0x14,0x4f]
+          vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
+
+// CHECK: vgatherqpd %xmm0, (%rdi,%xmm1,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xf9,0x93,0x14,0x4f]
+          vgatherqpd %xmm0, (%rdi,%xmm1,2), %xmm2
+
+// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xfd,0x92,0x14,0x4f]
+          vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+
+// CHECK: vgatherqpd %ymm0, (%rdi,%ymm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xfd,0x93,0x14,0x4f]
+          vgatherqpd %ymm0, (%rdi,%ymm1,2), %ymm2
+
+// CHECK: vgatherdps %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x39,0x92,0x14,0x4f]
+          vgatherdps %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x39,0x93,0x14,0x4f]
+          vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vgatherdps %ymm8, (%r15,%ymm9,2), %ymm10
+// CHECK: encoding: [0xc4,0x02,0x3d,0x92,0x14,0x4f]
+          vgatherdps %ymm8, (%r15,%ymm9,2), %ymm10
+
+// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x3d,0x93,0x14,0x4f]
+          vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
+
+// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xf9,0x90,0x14,0x4f]
+          vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+
+// CHECK: vpgatherqq %xmm0, (%rdi,%xmm1,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xf9,0x91,0x14,0x4f]
+          vpgatherqq %xmm0, (%rdi,%xmm1,2), %xmm2
+
+// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xfd,0x90,0x14,0x4f]
+          vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+
+// CHECK: vpgatherqq %ymm0, (%rdi,%ymm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xfd,0x91,0x14,0x4f]
+          vpgatherqq %ymm0, (%rdi,%ymm1,2), %ymm2
+
+// CHECK: vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x39,0x90,0x14,0x4f]
+          vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x39,0x91,0x14,0x4f]
+          vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vpgatherdd %ymm8, (%r15,%ymm9,2), %ymm10
+// CHECK: encoding: [0xc4,0x02,0x3d,0x90,0x14,0x4f]
+          vpgatherdd %ymm8, (%r15,%ymm9,2), %ymm10
+
+// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x3d,0x91,0x14,0x4f]
+          vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
diff --git a/test/MC/X86/x86_64-sse4a.s b/test/MC/X86/x86_64-sse4a.s
new file mode 100644
index 0000000..e5ed69e
--- /dev/null
+++ b/test/MC/X86/x86_64-sse4a.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+extrq  $2, $3, %xmm0
+# CHECK: extrq  $2, $3, %xmm0
+# CHECK: encoding: [0x66,0x0f,0x78,0xc0,0x03,0x02]
+
+extrq  %xmm1, %xmm0
+# CHECK: extrq  %xmm1, %xmm0
+# CHECK: encoding: [0x66,0x0f,0x79,0xc1]
+
+insertq $6, $5, %xmm1, %xmm0
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+# CHECK: encoding: [0xf2,0x0f,0x78,0xc1,0x05,0x06]
+
+insertq %xmm1, %xmm0
+# CHECK: insertq %xmm1, %xmm0
+# CHECK: encoding: [0xf2,0x0f,0x79,0xc1]
+
+movntsd %xmm0, (%rdi)
+# CHECK: movntsd %xmm0, (%rdi)
+# CHECK: encoding: [0xf2,0x0f,0x2b,0x07]
+
+movntss %xmm0, (%rdi)
+# CHECK: movntss %xmm0, (%rdi)
+# CHECK: encoding: [0xf3,0x0f,0x2b,0x07]
diff --git a/test/Makefile b/test/Makefile
index a4e53f8..9ddfabf 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -12,9 +12,6 @@ DIRS  =
 
 all:: check-local
 
-# 'lit' is the default test runner.
-check-local:: check-local-lit
-
 # Include other test rules
 include Makefile.tests
 
@@ -27,7 +24,6 @@ $(warning GREP_OPTIONS environment variable may interfere with test results)
 endif
 
 ifdef VERBOSE
-RUNTESTFLAGS := $(VERBOSE)
 LIT_ARGS := -v
 else
 LIT_ARGS := -s -v
@@ -42,7 +38,6 @@ ifdef TESTSUITE
 LIT_TESTSUITE := $(TESTSUITE)
 CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
 CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE))
-RUNTESTFLAGS += --tool $(CLEANED_TESTSUITE)
 else
 LIT_TESTSUITE := .
 endif
@@ -54,8 +49,8 @@ endif
 # Check what to run for -all.
 LIT_ALL_TESTSUITES := $(LIT_TESTSUITE)
 
-extra-lit-site-cfgs::
-.PHONY: extra-lit-site-cfgs
+extra-site-cfgs::
+.PHONY: extra-site-cfgs
 
 ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),)
 ifndef TESTSUITE
@@ -63,21 +58,20 @@ ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/Makefile && echo OK), OK)
 LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
 
 # Force creation of Clang's lit.site.cfg.
-clang-lit-site-cfg: FORCE
+clang-site-cfg: FORCE
 	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg Unit/lit.site.cfg
-extra-lit-site-cfgs:: clang-lit-site-cfg
-endif
-endif
+extra-site-cfgs:: clang-site-cfg
 endif
 
-IGNORE_TESTS :=
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/Makefile && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test
 
-ifndef RUNLLVM2CPP
-IGNORE_TESTS += llvm2cpp.exp
+# Force creation of Clang Tools' lit.site.cfg.
+clang-tools-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg
+extra-site-cfgs:: clang-tools-site-cfg
+endif
 endif
-
-ifdef IGNORE_TESTS
-RUNTESTFLAGS += --ignore "$(strip $(IGNORE_TESTS))"
 endif
 
 # ulimits like these are redundantly enforced by the buildbots, so
@@ -94,21 +88,14 @@ ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ;
 endif # AuroraUX
 endif # SunOS
 
-ifneq ($(RUNTEST),)
-check-local-dg:: site.exp
-	( $(ULIMIT) \
-	  PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \
-	  $(RUNTEST) $(RUNTESTFLAGS) )
-else
-check-local-dg:: site.exp
-	@echo "*** dejagnu not found.  Make sure 'runtest' is in your PATH, then reconfigure LLVM."
-endif
-
-check-local-lit:: lit.site.cfg Unit/lit.site.cfg
+check-local:: lit.site.cfg Unit/lit.site.cfg
 	( $(ULIMIT) \
 	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
 
-check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs
+# This is a legacy alias dating from when both DejaGNU and lit were in use.
+check-local-lit:: check-local
+
+check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 	( $(ULIMIT) \
 	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
 
@@ -129,48 +116,28 @@ endif
 
 FORCE:
 
-site.exp: FORCE
-	@echo 'Making a new site.exp file...'
-	@echo '## Autogenerated by LLVM configuration.' > site.tmp
-	@echo '# Do not edit!' >> site.tmp
-	@echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp
-	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
-	@echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp
-	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
-	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
-	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
-	@echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp
-	@echo 'set objdir "$(LLVM_OBJ_ROOT)/test"' >>site.tmp
-	@echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp
-	@echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp
-	@echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp
-	@echo 'set valgrind "$(VALGRIND)"' >> site.tmp
-	@echo 'set grep "$(GREP)"' >>site.tmp
-	@echo 'set gas "$(GAS)"' >>site.tmp
-	@echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp
-	@test ! -f site.exp || \
-	sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
-	@-rm -f site.bak
-	@test ! -f site.exp || mv site.exp site.bak
-	@mv site.tmp site.exp
-
 ifeq ($(DISABLE_ASSERTIONS),1)
 ENABLE_ASSERTIONS=0
 else
 ENABLE_ASSERTIONS=1
 endif
 
-lit.site.cfg: site.exp
+lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
-	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
+	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
-	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp
+	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
+	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
+	@$(ECHOPATH) s,@OCAMLOPT@,$(OCAMLOPT) -cc \\\\\"$(CXX_FOR_OCAMLOPT)\\\\\" -I $(LibDir)/ocaml,g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
+	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
+	@$(ECHOPATH) s=@HOST_ARCH@=$(HOST_ARCH)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
@@ -179,10 +146,11 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > unit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> unit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> unit.tmp
-	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> unit.tmp
 	@$(ECHOPATH) s=@LLVM_BUILD_MODE@=$(BuildMode)=g >> unit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> unit.tmp
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> unit.tmp
 	@$(ECHOPATH) s=@SHLIBPATH_VAR@=$(SHLIBPATH_VAR)=g >> unit.tmp
+	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> unit.tmp
+	@$(ECHOPATH) s=@HOST_ARCH@=$(HOST_ARCH)=g >> lit.tmp
 	@sed -f unit.tmp $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
 	@-rm -f unit.tmp
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
new file mode 100644
index 0000000..ca90222
--- /dev/null
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -0,0 +1,83 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+
+sections:
+  - !Section
+    Name: .text
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
+
+    Relocations:
+      - !Relocation
+        VirtualAddress: 0xe
+        SymbolTableIndex: 5
+        Type: IMAGE_REL_I386_DIR32
+
+      - !Relocation
+        VirtualAddress: 0x13
+        SymbolTableIndex: 6
+        Type: IMAGE_REL_I386_REL32
+
+      - !Relocation
+        VirtualAddress: 0x18
+        SymbolTableIndex: 7
+        Type: IMAGE_REL_I386_REL32
+
+  - !Section
+    Name: .data
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+symbols:
+  - !Symbol
+    Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+
+  - !Symbol
+    Name: .data
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+  - !Symbol
+    Name: _main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: L_.str
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+  - !Symbol
+    Name: _puts
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: _SomeOtherFunction
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
diff --git a/test/Object/Inputs/COFF/x86-64.yaml b/test/Object/Inputs/COFF/x86-64.yaml
new file mode 100644
index 0000000..0b1265f
--- /dev/null
+++ b/test/Object/Inputs/COFF/x86-64.yaml
@@ -0,0 +1,83 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_AMD64 # (0x8664)
+
+sections:
+  - !Section
+    Name: .text
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
+
+    Relocations:
+      - !Relocation
+        VirtualAddress: 0xf
+        SymbolTableIndex: 5
+        Type: IMAGE_REL_AMD64_REL32
+
+      - !Relocation
+        VirtualAddress: 0x14
+        SymbolTableIndex: 6
+        Type: IMAGE_REL_AMD64_REL32
+
+      - !Relocation
+        VirtualAddress: 0x19
+        SymbolTableIndex: 7
+        Type: IMAGE_REL_AMD64_REL32
+
+  - !Section
+    Name: .data
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+symbols:
+  - !Symbol
+    Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+
+  - !Symbol
+    Name: .data
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+  - !Symbol
+    Name: main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: L.str
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+  - !Symbol
+    Name: puts
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: SomeOtherFunction
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
diff --git a/test/Object/Inputs/trivial-object-test.coff-i386 b/test/Object/Inputs/trivial-object-test.coff-i386
index 8cfd994..d4ab63b 100644
--- a/test/Object/Inputs/trivial-object-test.coff-i386
+++ b/test/Object/Inputs/trivial-object-test.coff-i386
diff --git a/test/Object/Inputs/trivial-object-test.elf-hexagon b/test/Object/Inputs/trivial-object-test.elf-hexagon
new file mode 100644
index 0000000..566fa30
--- /dev/null
+++ b/test/Object/Inputs/trivial-object-test.elf-hexagon
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index e5635ab..8fd1c04 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,7 +1,7 @@
-RUN: llvm-nm %p/Inputs/trivial-object-test.coff-i386 \
-RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/Inputs/trivial-object-test.coff-x86-64 \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm \
 RUN:         | FileCheck %s -check-prefix COFF
+RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm \
+RUN          | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
@@ -30,4 +30,4 @@ macho: 00000000 U _puts
 macho64: 00000028 s L_.str
 macho64: 00000000 u _SomeOtherFunction
 macho64: 00000000 s _main
-macho64: 00000000 u _puts
-\ No newline at end of file
+macho64: 00000000 u _puts
diff --git a/test/Object/objdump-file-header.test b/test/Object/objdump-file-header.test
index 3fce3f4..a552113 100644
--- a/test/Object/objdump-file-header.test
+++ b/test/Object/objdump-file-header.test
@@ -1,5 +1,4 @@
-RUN: llvm-objdump -f %p/Inputs/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -f - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -f %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index c4b564e..a394a23 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -6,6 +6,8 @@ RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \
+RUN:              | FileCheck %s -check-prefix ELF-hexagon
 
 COFF-i386: .text
 COFF-i386: IMAGE_REL_I386_DIR32 L_.str
@@ -26,3 +28,11 @@ ELF-x86-64: .text
 ELF-x86-64: R_X86_64_32S .rodata.str1.1
 ELF-x86-64: R_X86_64_PC32 puts
 ELF-x86-64: R_X86_64_PC32 SomeOtherFunction
+
+ELF-hexagon: .text
+ELF-hexagon: R_HEX_GOTREL_HI16 .main
+ELF-hexagon: R_HEX_GOTREL_LO16 .main
+ELF-hexagon: R_HEX_HI16 puts
+ELF-hexagon: R_HEX_LO16 puts
+ELF-hexagon: R_HEX_B15_PCREL testf
+ELF-hexagon: R_HEX_B22_PCREL puts
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
index 581e75e..f9c4f43 100644
--- a/test/Object/objdump-section-content.test
+++ b/test/Object/objdump-section-content.test
@@ -1,9 +1,8 @@
-RUN: llvm-objdump -s %p/Inputs/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -s - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 
-COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: file format
 COFF-i386: Contents of section .text:
 COFF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
 COFF-i386:  0010 0000e800 000000e8 00000000 8b442408  .............D$.
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
index 8a0f440..989ec04 100644
--- a/test/Object/objdump-symbol-table.test
+++ b/test/Object/objdump-symbol-table.test
@@ -1,17 +1,17 @@
-RUN: llvm-objdump -t %p/Inputs/trivial-object-test.coff-i386 \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -t - \
 RUN:              | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
 RUN:              | FileCheck %s -check-prefix macho-i386
 
-COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: file format
 COFF-i386: SYMBOL TABLE:
 COFF-i386: [  0](sec  1)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .text
 COFF-i386: AUX scnlen 0x24 nreloc 3 nlnno 0 checksum 0x0 assoc 1 comdat 0
 COFF-i386: [  2](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .data
 COFF-i386: AUX scnlen 0xd nreloc 0 nlnno 0 checksum 0x0 assoc 2 comdat 0
-COFF-i386: [  4](sec  1)(fl 0x00)(ty 200)(scl   2) (nx 0) 0x00000000 _main
+COFF-i386: [  4](sec  1)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 _main
 COFF-i386: [  5](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 0) 0x00000000 L_.str
 COFF-i386: [  6](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _puts
 COFF-i386: [  7](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _SomeOtherFunction
@@ -30,4 +30,4 @@ macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i38
 macho-i386: SYMBOL TABLE:
 macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
 macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
-macho-i386: 00000000         *UND*  00000000 _puts
-\ No newline at end of file
+macho-i386: 00000000         *UND*  00000000 _puts
diff --git a/test/Other/2003-02-19-LoopInfoNestingBug.ll b/test/Other/2003-02-19-LoopInfoNestingBug.ll
index 13f8351..b807c44 100644
--- a/test/Other/2003-02-19-LoopInfoNestingBug.ll
+++ b/test/Other/2003-02-19-LoopInfoNestingBug.ll
@@ -3,7 +3,7 @@
 ; and instead nests it just inside loop "Top"
 ;
 ; RUN: opt < %s -analyze -loops | \
-; RUN:   grep {     Loop at depth 3 containing: %Inner<header><latch><exiting>}
+; RUN:   grep "     Loop at depth 3 containing: %Inner<header><latch><exiting>"
 ;
 define void @test() {
         br label %Top
diff --git a/test/Other/2008-10-15-MissingSpace.ll b/test/Other/2008-10-15-MissingSpace.ll
index d16ea72..cac696e 100644
--- a/test/Other/2008-10-15-MissingSpace.ll
+++ b/test/Other/2008-10-15-MissingSpace.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | not grep {void@}
+; RUN: llvm-as < %s | llvm-dis | not grep "void@"
 ; PR2894
 declare void @g()
 define void @f() {
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 40a01cc..1d207c7 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -1,7 +1,5 @@
-; RUN: sh -c "\
-; RUN:        opt --reject-this-option 2>&-; echo \$?; \
-; RUN:        opt -o /dev/null /dev/null 2>&-; echo \$?; \
-; RUN:       " | FileCheck %s
+; RUN: sh -c 'opt --reject-this-option 2>&-; echo $?; opt -o /dev/null /dev/null 2>&-; echo $?;' \
+; RUN:   | FileCheck %s
 ; CHECK: {{^1$}}
 ; CHECK: {{^0$}}
 ; XFAIL: vg_leak
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index d28c178..eafb16e 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -263,10 +263,10 @@ define i1* @hoo1() nounwind {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
 ; OPT: define i64 @fc() nounwind {
-; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
+; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
 ; OPT: }
 ; OPT: define i64 @fd() nounwind {
-; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
+; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
 ; OPT: }
 ; OPT: define i64 @fe() nounwind {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
@@ -433,7 +433,7 @@ define i64* @fO() nounwind {
 ; PLAIN:   ret i32* %t
 ; PLAIN: }
 ; OPT: define i32* @fZ() nounwind {
-; OPT:   ret i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT:   ret i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 ; OPT: }
 ; TO: define i32* @fZ() nounwind {
 ; TO:   ret i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
diff --git a/test/Other/invalid-commandline-option.ll b/test/Other/invalid-commandline-option.ll
index 60840fa..583d449 100644
--- a/test/Other/invalid-commandline-option.ll
+++ b/test/Other/invalid-commandline-option.ll
@@ -1,3 +1,3 @@
-; RUN: not opt --foo |& grep {Unknown command line argument}
+; RUN: not opt --foo 2>&1 | grep "Unknown command line argument"
 
 ; there is no --foo
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index ca2b1a3..c84f56f 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -lint -disable-output < %s |& FileCheck %s
+; RUN: opt -basicaa -lint -disable-output < %s 2>&1 | FileCheck %s
 target datalayout = "e-p:64:64:64"
 
 declare fastcc void @bar()
diff --git a/test/Other/optimize-options.ll b/test/Other/optimize-options.ll
new file mode 100644
index 0000000..888a78f
--- /dev/null
+++ b/test/Other/optimize-options.ll
@@ -0,0 +1,8 @@
+;RUN: opt -S -O1 -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -O2 -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -Os -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -Oz -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -O3 -debug-pass=Arguments 2>&1 | FileCheck %s
+
+; Just check that we get a non-empty set of passes for each -O opton.
+;CHECK: Pass Arguments: {{.*}} -print-module
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
index 58ca177..69cdacd 100755
--- a/test/Scripts/elf-dump
+++ b/test/Scripts/elf-dump
@@ -15,6 +15,7 @@ class Reader:
             self.file = open(path, "rb")
         self.isLSB = None
         self.is64Bit = None
+        self.isN64 = False
 
     def seek(self, pos):
         self.file.seek(pos)
@@ -122,15 +123,28 @@ def dumpRel(f, section, dumprela = False):
         f.seek(section.sh_offset[0] + index * section.sh_entsize[0])
         print "    # Relocation %s" % index
         print "    (('r_offset', %s)" % common_dump.HexDump(f.readWord())
-        r_info = f.readWord()[0]
-        if f.is64Bit:
-            r_sym = (r_info >> 32, 32)
-            r_type = (r_info & 0xffffffff, 32)
+
+        if f.isN64:
+            r_sym =   f.read32()
+            r_ssym =  f.read8()
+            r_type3 = f.read8()
+            r_type2 = f.read8()
+            r_type =  f.read8()
+            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
+            print "     ('r_ssym', %s)" % common_dump.HexDump(r_ssym)
+            print "     ('r_type3', %s)" % common_dump.HexDump(r_type3)
+            print "     ('r_type2', %s)" % common_dump.HexDump(r_type2)
+            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
         else:
-            r_sym = (r_info >> 8, 24)
-            r_type = (r_info & 0xff, 8)
-        print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
-        print "     ('r_type', %s)" % common_dump.HexDump(r_type)
+            r_info = f.readWord()[0]
+            if f.is64Bit:
+                r_sym = (r_info >> 32, 32)
+                r_type = (r_info & 0xffffffff, 32)
+            else:
+                r_sym = (r_info >> 8, 24)
+                r_type = (r_info & 0xff, 8)
+            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
+            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
         if dumprela:
             print "     ('r_addend', %s)" % common_dump.HexDump(f.readWord())
         print "    ),"
@@ -166,7 +180,13 @@ def dumpELF(path, opts):
     f.seek(16) # Seek to end of e_ident.
 
     print "('e_type', %s)" % common_dump.HexDump(f.read16())
-    print "('e_machine', %s)" % common_dump.HexDump(f.read16())
+
+    # Does any other architecture use N64?
+    e_machine = f.read16()
+    if e_machine[0] == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit
+        f.isN64 = True 
+    
+    print "('e_machine', %s)" % common_dump.HexDump(e_machine)
     print "('e_version', %s)" % common_dump.HexDump(f.read32())
     print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
     print "('e_phoff', %s)" % common_dump.HexDump(f.readWord())
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index 47fd81d..46d3f62 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {zing = 4} | count 4
+// RUN: llvm-tblgen %s | grep "zing = 4" | count 4
 // XFAIL: vg_leak
 
 class C1<int A, string B> { 
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index e2defe9..4aacc74 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -6,10 +6,19 @@ class Register<string name, int idx> {
   int Index = idx;
 }
 
+// CHECK-NOT: !strconcat
+
+foreach i = 0-3 in
+  def Q#i : Register<"Q"#i, i>;
+
+// CHECK: def Q0
+// CHECK: def Q1
+// CHECK: def Q2
+// CHECK: def Q3
+
 foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
   def R#i : Register<"R"#i, i>;
 
-
 // CHECK: def R0
 // CHECK: string Name = "R0";
 // CHECK: int Index = 0;
@@ -41,3 +50,14 @@ foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
 // CHECK: def R7
 // CHECK: string Name = "R7";
 // CHECK: int Index = 7;
+
+foreach i = {0-3,9-7} in
+  def S#i : Register<"Q"#i, i>;
+
+// CHECK: def S0
+// CHECK: def S1
+// CHECK: def S2
+// CHECK: def S3
+// CHECK: def S7
+// CHECK: def S8
+// CHECK: def S9
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 8145a3f..306959e 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {int Y = 3}
+// RUN: llvm-tblgen %s | grep "int Y = 3"
 // XFAIL: vg_leak
 
 class C {
diff --git a/test/TableGen/ListOfList.td b/test/TableGen/ListOfList.td
index 565a99c..864401e 100644
--- a/test/TableGen/ListOfList.td
+++ b/test/TableGen/ListOfList.td
@@ -1,6 +1,6 @@
 // RUN llvm-tblgen %s | FileCheck %s
 
-// RUN: llvm-tblgen %s | grep {foo} | count 1
+// RUN: llvm-tblgen %s | grep "foo" | count 1
 // XFAIL: vg_leak
 
 class Base<string t> {
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index 04f3a56..449c5d6 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {zing = 4} | count 2
+// RUN: llvm-tblgen %s | grep "zing = 4" | count 2
 // XFAIL: vg_leak
 
 class C1<int A, string B> { 
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index 8b78bc7..c768fff 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {zing = 4} | count 28
+// RUN: llvm-tblgen %s | grep "zing = 4" | count 28
 // XFAIL: vg_leak
 
 class C1<int A, string B> { 
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index 4d85aa3..7613323 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -161,10 +161,12 @@ def S9a : Set<(sequence "e%u", 3, 7)>;
 def S9b : Set<(sequence "e%u", 7, 3)>;
 def S9c : Set<(sequence "e%u", 0, 0)>;
 def S9d : Set<(sequence "S%ua", 7, 9)>;
+def S9e : Set<(sequence "e%u", 3, 6, 2)>;
 // CHECK: S9a = [ e3 e4 e5 e6 e7 ]
 // CHECK: S9b = [ e7 e6 e5 e4 e3 ]
 // CHECK: S9c = [ e0 ]
 // CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ]
+// CHECK: S9e = [ e3 e5 ]
 
 // The 'interleave' operator is almost the inverse of 'decimate'.
 def interleave;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index 2d2822c..6d051d7 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,5 +1,5 @@
-// RUN: llvm-tblgen %s | grep {\\\[(set} | count 2
-// RUN: llvm-tblgen %s | grep {\\\[\\\]} | count 2
+// RUN: llvm-tblgen %s | grep "\[(set" | count 2
+// RUN: llvm-tblgen %s | grep "\[\]" | count 2
 // XFAIL: vg_leak
 
 class ValueType<int size, int value> {
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index 7b611e7..64b706d 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,5 +1,5 @@
-// RUN: llvm-tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_pd VR128:\$src1, VR128:\$src2))\\\]} | count 1
-// RUN: llvm-tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_ps VR128:\$src1, VR128:\$src2))\\\]} | count 1
+// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))\]' | count 1
+// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))\]' | count 1
 // XFAIL: vg_leak
 
 class ValueType<int size, int value> {
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index 8a23eb4..7948aff 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {add_ps} | count 3
+// RUN: llvm-tblgen %s | grep "add_ps" | count 3
 // XFAIL: vg_leak
 
 class ValueType<int size, int value> {
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index 814ae6e..902af25 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,6 @@
-// RUN: llvm-tblgen %s | grep {Jr} | count 2
-// RUN: llvm-tblgen %s | grep {Sr} | count 2
-// RUN: llvm-tblgen %s | grep {"NAME"} | count 1
+// RUN: llvm-tblgen %s | grep 'Jr' | count 2
+// RUN: llvm-tblgen %s | grep 'Sr' | count 2
+// RUN: llvm-tblgen %s | grep '"NAME"' | count 1
 // XFAIL: vg_leak
 
 // Variables for foreach
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index 025aca9..dd85ddc 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen %s | grep {}
+// RUN: llvm-tblgen %s | grep ""
 // XFAIL: vg_leak
 
 class List<list<string> n> {
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index 5a73ec4..850ac38 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -1,9 +1,9 @@
-// RUN: llvm-tblgen %s | grep {Smith} | count 7
-// RUN: llvm-tblgen %s | grep {Johnson} | count 2
-// RUN: llvm-tblgen %s | grep {FIRST} | count 1
-// RUN: llvm-tblgen %s | grep {LAST} | count 1
-// RUN: llvm-tblgen %s | grep {TVAR} | count 2
-// RUN: llvm-tblgen %s | grep {Bogus} | count 1
+// RUN: llvm-tblgen %s | grep "Smith" | count 7
+// RUN: llvm-tblgen %s | grep "Johnson" | count 2
+// RUN: llvm-tblgen %s | grep "FIRST" | count 1
+// RUN: llvm-tblgen %s | grep "LAST" | count 1
+// RUN: llvm-tblgen %s | grep "TVAR" | count 2
+// RUN: llvm-tblgen %s | grep "Bogus" | count 1
 // XFAIL: vg_leak
 
 class Honorific<string t> {
diff --git a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
index d7d5eb5..210eb97 100644
--- a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
+++ b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -argpromotion -S > %t
-; RUN: cat %t | grep {define.*@callee(.*i32\\*}
+; RUN: cat %t | grep "define.*@callee(.*i32\*"
 ; PR2498
 
 ; This test tries to convince argpromotion about promoting the load from %A + 2,
diff --git a/test/Transforms/ArgumentPromotion/byval-2.ll b/test/Transforms/ArgumentPromotion/byval-2.ll
index bd62c68..368c689 100644
--- a/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -argpromotion -S | grep -F {i32* byval} | count 2
+; RUN: opt < %s -argpromotion -S | grep -F "i32* byval" | count 2
 ; Argpromote + scalarrepl should change this to passing the two integers by value.
 
 	%struct.ss = type { i32, i64 }
diff --git a/test/Transforms/ArgumentPromotion/control-flow.ll b/test/Transforms/ArgumentPromotion/control-flow.ll
index 08ca6bc..e4a61da 100644
--- a/test/Transforms/ArgumentPromotion/control-flow.ll
+++ b/test/Transforms/ArgumentPromotion/control-flow.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -argpromotion -S | \
-; RUN:    not grep {load i32\* null}
+; RUN:    not grep "load i32* null"
 
 define internal i32 @callee(i1 %C, i32* %P) {
         br i1 %C, label %T, label %F
diff --git a/test/Transforms/ArgumentPromotion/control-flow2.ll b/test/Transforms/ArgumentPromotion/control-flow2.ll
index 9a8afc3..2543218 100644
--- a/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -argpromotion -S | \
-; RUN:   grep {load i32\\* %A}
+; RUN:   grep "load i32\* %A"
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define internal i32 @callee(i1 %C, i32* %P) {
diff --git a/test/Transforms/BBVectorize/metadata.ll b/test/Transforms/BBVectorize/metadata.ll
new file mode 100644
index 0000000..1e3aaa1
--- /dev/null
+++ b/test/Transforms/BBVectorize/metadata.ll
@@ -0,0 +1,49 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s
+
+; Simple 3-pair chain with loads and stores (with fpmath)
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1, !fpmath !2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4, !fpmath !3
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: !fpmath
+; CHECK: ret void
+}
+
+; Simple 3-pair chain with loads and stores (ints with range)
+define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load i64* %a, align 8, !range !0
+  %i1 = load i64* %b, align 8
+  %mul = mul i64 %i0, %i1
+  %arrayidx3 = getelementptr inbounds i64* %a, i64 1
+  %i3 = load i64* %arrayidx3, align 8, !range !1
+  %arrayidx4 = getelementptr inbounds i64* %b, i64 1
+  %i4 = load i64* %arrayidx4, align 8
+  %mul5 = mul i64 %i3, %i4
+  store i64 %mul, i64* %c, align 8
+  %arrayidx5 = getelementptr inbounds i64* %c, i64 1
+  store i64 %mul5, i64* %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK-NOT: !range
+; CHECK: ret void
+}
+
+!0 = metadata !{i64 0, i64 2}
+!1 = metadata !{i64 3, i64 5}
+
+!2 = metadata !{ float 5.0 }
+!3 = metadata !{ float 2.5 }
+
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 4daa571..325792a 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -1,5 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
 
 ; Basic depth-3 chain with select
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
@@ -27,4 +28,32 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain with select (and vect. compare)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test2
+; CHECK-NB: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%C1 = fcmp ogt double %X1, %A1
+        %C2 = fcmp ogt double %X2, %A2
+; CHECK: %C1 = fcmp ogt <2 x double> %X1, %X1.v.i0.2
+; CHECK-NB: fcmp ogt double
+        %Z1 = select i1 %C1, double %Y1, double %B1
+        %Z2 = select i1 %C2, double %Y2, double %B2
+; CHECK: %Z1 = select <2 x i1> %C1, <2 x double> %Y1, <2 x double> %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
 
diff --git a/test/Transforms/BBVectorize/simple-tst.ll b/test/Transforms/BBVectorize/simple-tst.ll
new file mode 100644
index 0000000..42146c6
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-tst.ll
@@ -0,0 +1,18 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=256 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain (target-specific type should not vectorize)
+define ppc_fp128 @test7(ppc_fp128 %A1, ppc_fp128 %A2, ppc_fp128 %B1, ppc_fp128 %B2) {
+; CHECK: @test7
+; CHECK-NOT: <2 x ppc_fp128>
+	%X1 = fsub ppc_fp128 %A1, %B1
+	%X2 = fsub ppc_fp128 %A2, %B2
+	%Y1 = fmul ppc_fp128 %X1, %A1
+	%Y2 = fmul ppc_fp128 %X2, %A2
+	%Z1 = fadd ppc_fp128 %Y1, %B1
+	%Z2 = fadd ppc_fp128 %Y2, %B2
+	%R  = fmul ppc_fp128 %Z1, %Z2
+	ret ppc_fp128 %R
+}
+
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index 904d766..88eb9c9 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -138,8 +138,7 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
 ; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
         %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
         %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
-; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15>
-; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
 	%R  = mul <8 x i8> %Q1, %Q2
 ; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
new file mode 100644
index 0000000..153be73
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -0,0 +1,35 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
+; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
+; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1
+; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%X3 = fsub double %A3, %B3
+; CHECK: %X1 = fsub <3 x double> %X1.v.i0, %X1.v.i1
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%Y3 = fmul double %X3, %A3
+; CHECK: %Y1 = fmul <3 x double> %X1, %X1.v.i0
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%Z3 = fadd double %Y3, %B3
+; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1
+        %R1 = fmul double %Z1, %Z2
+	%R  = fmul double %R1, %Z3
+; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2
+; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1
+; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: %R = fmul double %R1, %Z1.v.r210
+	ret double %R
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index ebf10f0..c68e77e 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 ; CHECK: @test1
 ; objectsize should fold to a constant, which causes the branch to fold to an
-; uncond branch.
+; uncond branch. Next, we fold the control flow alltogether.
 ; rdar://8785296
 define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
 entry:
@@ -13,8 +13,8 @@ entry:
   %1 = icmp ugt i64 %0, 3
   br i1 %1, label %T, label %trap
 
-; CHECK: entry:
-; CHECK-NEXT: br label %T
+; CHECK: T:
+; CHECK-NOT: br label %
 
 trap:                                             ; preds = %0, %entry
   tail call void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
index b957220..ca1d618 100644
--- a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
+++ b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
@@ -5,7 +5,7 @@
 ; Fix #2: The unary not instruction now no longer exists. Change to xor.
 
 ; RUN: opt < %s -constprop -S | \
-; RUN:   not grep {i32 0}
+; RUN:   not grep "i32 0"
 
 define i32 @test1() {
         %R = xor i32 123, -1            ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
index 0b44b99..d68cb26 100644
--- a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
+++ b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -constprop -S | \
-; RUN:    not grep {ret i1 false}
+; RUN:    not grep "ret i1 false"
 
 @b = external global [2 x {  }]         ; <[2 x {  }]*> [#uses=2]
 
diff --git a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
index be76783..4a93144 100644
--- a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
+++ b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -constprop -S | \
-; RUN:   grep {i32 -1}
+; RUN:   grep "i32 -1"
 ; RUN: opt < %s -constprop -S | \
 ; RUN:   not grep zeroinitializer
 
diff --git a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
index e46a875..ce66c70 100644
--- a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
+++ b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {ret i1 false}
+; RUN:   grep "ret i1 false"
 define i1 @test() {
         %X = trunc i32 320 to i1                ; <i1> [#uses=1]
         ret i1 %X
diff --git a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
index 3c06693..71db421 100644
--- a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
+++ b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -constprop -S | \
-; RUN:    grep {ret i32 -1}
+; RUN:    grep "ret i32 -1"
 ; RUN: opt < %s -constprop -S | \
-; RUN:    grep {ret i32 1}
+; RUN:    grep "ret i32 1"
 
 define i32 @test1() {
         %A = sext i1 true to i32                ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstProp/2007-02-23-sdiv.ll b/test/Transforms/ConstProp/2007-02-23-sdiv.ll
index 721199f..75f58b5 100644
--- a/test/Transforms/ConstProp/2007-02-23-sdiv.ll
+++ b/test/Transforms/ConstProp/2007-02-23-sdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis | grep {global i32 0}
+; RUN: llvm-as < %s | llvm-dis | grep "global i32 0"
 ; PR1215
 
 @G = global i32 sdiv (i32 0, i32 -1)
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index a28c9b0..6d34cb1 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -constprop -S | grep {ret i13 13}
+; RUN: opt < %s -constprop -S | grep "ret i13 13"
 ; PR1816
 declare i13 @llvm.cttz.i13(i13, i1)
 
diff --git a/test/Transforms/ConstProp/div-zero.ll b/test/Transforms/ConstProp/div-zero.ll
index f78a34f..a2c59d3 100644
--- a/test/Transforms/ConstProp/div-zero.ll
+++ b/test/Transforms/ConstProp/div-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -instcombine -S | grep "ret i32 0"
 ; PR4424
 declare void @ext()
 
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index 9b70ed2..6750546 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -41,3 +41,127 @@ end:
 ; CHECK: then:
 ; CHECK-NEXT: br i1 false, label %end, label %else
 }
+
+; CHECK: @test3
+define i32 @test3(i32 %c) nounwind {
+  %cmp = icmp slt i32 %c, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ret i32 1
+
+if.end:
+  %cmp1 = icmp slt i32 %c, 3
+  br i1 %cmp1, label %if.then2, label %if.end8
+
+; CHECK: if.then2
+if.then2:
+  %cmp2 = icmp eq i32 %c, 2
+; CHECK: br i1 true
+  br i1 %cmp2, label %if.then4, label %if.end6
+
+; CHECK: if.end6
+if.end6:
+  ret i32 2
+
+if.then4:
+  ret i32 3
+
+if.end8:
+  ret i32 4
+}
+
+; CHECK: @test4
+define i32 @test4(i32 %c) nounwind {
+  switch i32 %c, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 4, label %sw.bb
+  ]
+
+; CHECK: sw.bb
+sw.bb:
+  %cmp = icmp sge i32 %c, 1
+; CHECK: br i1 true
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  br label %return
+
+if.end:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 42, %sw.default ], [ 4, %if.then ], [ 9, %if.end ]
+  ret i32 %retval.0
+}
+
+; CHECK: @test5
+define i1 @test5(i32 %c) nounwind {
+  %cmp = icmp slt i32 %c, 5
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %cmp1 = icmp eq i32 %c, 4
+  br i1 %cmp1, label %if.end, label %if.end8
+
+if.end:
+  ret i1 true
+
+if.end8:
+  %cmp2 = icmp eq i32 %c, 3
+  %cmp3 = icmp eq i32 %c, 4
+  %cmp4 = icmp eq i32 %c, 6
+; CHECK: %or = or i1 false, false
+  %or = or i1 %cmp3, %cmp4
+; CHECK: ret i1 %cmp2
+  ret i1 %cmp2
+}
+
+; CHECK: @test6
+define i1 @test6(i32 %c) nounwind {
+  %cmp = icmp ule i32 %c, 7
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+; CHECK: icmp eq i32 %c, 6
+; CHECK: br i1
+  switch i32 %c, label %if.end [
+    i32 6, label %sw.bb
+    i32 8, label %sw.bb
+  ]
+
+if.end:
+  ret i1 true
+
+sw.bb:
+  %cmp2 = icmp eq i32 %c, 6
+; CHECK: ret i1 true
+  ret i1 %cmp2
+}
+
+; CHECK: @test7
+define i1 @test7(i32 %c) nounwind {
+entry:
+ switch i32 %c, label %sw.default [
+   i32 6, label %sw.bb
+   i32 7, label %sw.bb
+ ]
+
+sw.bb:
+ ret i1 true
+
+sw.default:
+ %cmp5 = icmp eq i32 %c, 5
+ %cmp6 = icmp eq i32 %c, 6
+ %cmp7 = icmp eq i32 %c, 7
+ %cmp8 = icmp eq i32 %c, 8
+; CHECK: %or = or i1 %cmp5, false
+ %or = or i1 %cmp5, %cmp6
+; CHECK: %or2 = or i1 false, %cmp8
+ %or2 = or i1 %cmp7, %cmp8
+ ret i1 false
+}
diff --git a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
index d5bd6c4..e5419f7 100644
--- a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
+++ b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -deadargelim -S | grep {@test(}
+; RUN: opt < %s -deadargelim -S | grep "@test("
 ; RUN: opt < %s -deadargelim -S | not grep dead
 
 define internal i32 @test(i32 %X, i32 %dead) {
diff --git a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
index d4edce9..cdd893f 100644
--- a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
+++ b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -deadargelim -S | not grep {ret i32 0}
+; RUN: opt < %s -deadargelim -S | not grep "ret i32 0"
 ; PR1735
 
 define internal i32 @test(i32 %A, ...) { 
diff --git a/test/Transforms/DeadArgElim/canon.ll b/test/Transforms/DeadArgElim/canon.ll
index 11cd482..79c15a0 100644
--- a/test/Transforms/DeadArgElim/canon.ll
+++ b/test/Transforms/DeadArgElim/canon.ll
@@ -1,9 +1,9 @@
 ; This test shows a few canonicalizations made by deadargelim
 ; RUN: opt < %s -deadargelim -S > %t
 ; This test should remove {} and replace it with void
-; RUN: cat %t | grep {define internal void @test}
+; RUN: cat %t | grep "define internal void @test"
 ; This test shouls replace the {i32} return value with just i32
-; RUN: cat %t | grep {define internal i32 @test2}
+; RUN: cat %t | grep "define internal i32 @test2"
 
 define internal {} @test() {
   ret {} undef
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index 4d6aae3..dc92dc9f 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -deadargelim -S > %t
-; RUN: grep {define internal zeroext i32 @test1() nounwind} %t
-; RUN: grep {define internal <{ i32, i32 }> @test2} %t
+; RUN: grep "define internal zeroext i32 @test1() nounwind" %t
+; RUN: grep "define internal <{ i32, i32 }> @test2" %t
 
 %Ty = type <{ i32, i32 }>
 
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 81eb5a8..7a8cdd5 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -164,7 +164,7 @@ define i32* @test13() {
 }
 
 declare noalias i8* @malloc(i32)
-
+declare noalias i8* @calloc(i32, i32)
 
 
 define void @test14(i32* %Q) {
@@ -258,3 +258,55 @@ define void @test20() {
 }
 ; CHECK: @test20
 ; CHECK-NEXT: ret void
+
+; CHECK: @test21
+define void @test21() {
+  %m = call i8* @calloc(i32 9, i32 7)
+  store i8 0, i8* %m
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; CHECK: @test22(
+define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
+  %k.addr = alloca i32
+  %m.addr = alloca i32
+  %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
+  store i32 0, i32* %k.addr.m.addr, align 4
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; PR13547
+; CHECK: @test23
+; CHECK: store i8 97
+; CHECK: store i8 0
+declare noalias i8* @strdup(i8* nocapture) nounwind
+define noalias i8* @test23() nounwind uwtable ssp {
+  %x = alloca [2 x i8], align 1
+  %arrayidx = getelementptr inbounds [2 x i8]* %x, i64 0, i64 0
+  store i8 97, i8* %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds [2 x i8]* %x, i64 0, i64 1
+  store i8 0, i8* %arrayidx1, align 1
+  %call = call i8* @strdup(i8* %arrayidx) nounwind
+  ret i8* %call
+}
+
+; Make sure same sized store to later element is deleted
+; CHECK: @test24
+; CHECK-NOT: store i32 0
+; CHECK-NOT: store i32 0
+; CHECK: store i32 %b
+; CHECK: store i32 %c
+; CHECK: ret void
+define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
+  %1 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 0
+  store i32 0, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 1
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 0
+  store i32 %b, i32* %3, align 4
+  %4 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 1
+  store i32 %c, i32* %4, align 4
+  ret void
+}
diff --git a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
index 7ef5f06..f38c03a 100644
--- a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
+++ b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
-; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p}
+; RUN: opt < %s -functionattrs -S | not grep "nocapture *%%q"
+; RUN: opt < %s -functionattrs -S | grep "nocapture *%%p"
 
 define i32* @a(i32** %p) {
 	%tmp = load i32** %p
diff --git a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 9983374..7e9c982 100644
--- a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | not grep {tmp10 =}
+; RUN: opt < %s -basicaa -gvn -S | not grep "tmp10 ="
 
 	%struct.INT2 = type { i32, i32 }
 @blkshifts = external global %struct.INT2*		; <%struct.INT2**> [#uses=2]
diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index f2c0012..5018a07 100644
--- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {tmp47 = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep "tmp47 = phi i32 "
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @debug = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
index a570e35..13419d1 100644
--- a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
+++ b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | not grep {tmp701 =}
+; RUN: opt < %s -basicaa -gvn -S | not grep "tmp701 ="
 
 @img_width = external global i16		; <i16*> [#uses=2]
 
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 407940b..4f07868 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {ret i8 \[%\]tmp3}
+; RUN: opt < %s -basicaa -gvn -S | grep "ret i8 [%]tmp3"
 ; PR2503
 
 @g_3 = external global i8		; <i8*> [#uses=2]
diff --git a/test/Transforms/GVN/2012-05-22-PreCrash.ll b/test/Transforms/GVN/2012-05-22-PreCrash.ll
new file mode 100644
index 0000000..b488dda
--- /dev/null
+++ b/test/Transforms/GVN/2012-05-22-PreCrash.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -gvn
+; PR12858
+
+define void @fn5(i16 signext %p1, i8 signext %p2) nounwind uwtable {
+entry:
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %conv = sext i16 %p1 to i32
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %conv1 = sext i16 %p1 to i32
+  br i1 undef, label %if.then3, label %if.else4
+
+if.then3:                                         ; preds = %if.end
+  br label %if.end12
+
+if.else4:                                         ; preds = %if.end
+  %conv7 = sext i8 %p2 to i32
+  %cmp8 = icmp eq i32 %conv1, %conv7
+  br i1 %cmp8, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else4
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.then10, %if.else4, %if.then3
+  %conv13 = sext i8 %p2 to i32
+  ret void
+}
diff --git a/test/Transforms/GVN/basic.ll b/test/Transforms/GVN/basic.ll
index 1decafa..6f4aace 100644
--- a/test/Transforms/GVN/basic.ll
+++ b/test/Transforms/GVN/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep {%z2 =}
+; RUN: opt < %s -gvn -S | not grep "%z2 ="
 
 define i32 @main() {
 block1:
diff --git a/test/Transforms/GVN/calls-readonly.ll b/test/Transforms/GVN/calls-readonly.ll
index 97ec915..a477740 100644
--- a/test/Transforms/GVN/calls-readonly.ll
+++ b/test/Transforms/GVN/calls-readonly.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {call.*strlen} | count 1
+; RUN: opt < %s -basicaa -gvn -S | grep "call.*strlen" | count 1
 ; Should delete the second call to strlen even though the intervening strchr call exists.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll
new file mode 100644
index 0000000..8ab2854
--- /dev/null
+++ b/test/Transforms/GVN/fpmath.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -gvn -S -o - | FileCheck %s
+
+define double @test1(double %x, double %y) {
+; CHECK: @test1(double %x, double %y)
+; CHECK: %add1 = fadd double %x, %y
+; CHECK-NOT: fpmath
+; CHECK: %foo = fadd double %add1, %add1
+  %add1 = fadd double %x, %y, !fpmath !0
+  %add2 = fadd double %x, %y
+  %foo = fadd double %add1, %add2
+  ret double %foo
+}
+
+define double @test2(double %x, double %y) {
+; CHECK: @test2(double %x, double %y)
+; CHECK: %add1 = fadd double %x, %y, !fpmath !0
+; CHECK: %foo = fadd double %add1, %add1
+  %add1 = fadd double %x, %y, !fpmath !0
+  %add2 = fadd double %x, %y, !fpmath !0
+  %foo = fadd double %add1, %add2
+  ret double %foo
+}
+
+define double @test3(double %x, double %y) {
+; CHECK: @test3(double %x, double %y)
+; CHECK: %add1 = fadd double %x, %y, !fpmath !1
+; CHECK: %foo = fadd double %add1, %add1
+  %add1 = fadd double %x, %y, !fpmath !1
+  %add2 = fadd double %x, %y, !fpmath !0
+  %foo = fadd double %add1, %add2
+  ret double %foo
+}
+
+define double @test4(double %x, double %y) {
+; CHECK: @test4(double %x, double %y)
+; CHECK: %add1 = fadd double %x, %y, !fpmath !1
+; CHECK: %foo = fadd double %add1, %add1
+  %add1 = fadd double %x, %y, !fpmath !0
+  %add2 = fadd double %x, %y, !fpmath !1
+  %foo = fadd double %add1, %add2
+  ret double %foo
+}
+
+!0 = metadata !{ float 5.0 }
+!1 = metadata !{ float 2.5 }
diff --git a/test/Transforms/GVN/load-constant-mem.ll b/test/Transforms/GVN/load-constant-mem.ll
index 314c806..a7dacea 100644
--- a/test/Transforms/GVN/load-constant-mem.ll
+++ b/test/Transforms/GVN/load-constant-mem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep "ret i32 0"
 ; PR4189
 @G = external constant [4 x i32]
 
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll
index 5f03984..1d0dadf 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/local-pre.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -enable-pre -S | grep {b.pre}
+; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre"
 
 define i32 @main(i32 %p) {
 block1:
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index dba9d81..afcb7fe 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -stats -disable-output |& grep {Number of loads deleted}
+; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted"
 ; rdar://7363102
 
 ; GVN should be able to eliminate load %tmp22.i, because it is redundant with
diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll
new file mode 100644
index 0000000..669da91
--- /dev/null
+++ b/test/Transforms/GVN/pr12979.ll
@@ -0,0 +1,79 @@
+; RUN: opt %s -gvn -S -o - | FileCheck %s
+
+define i32 @test1(i32 %x, i32 %y) {
+; CHECK: @test1(i32 %x, i32 %y)
+; CHECK: %add1 = add i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nsw i32 %x, %y
+  %add2 = add     i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test2(i32 %x, i32 %y) {
+; CHECK: @test2(i32 %x, i32 %y)
+; CHECK: %add1 = add i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nuw i32 %x, %y
+  %add2 = add     i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test3(i32 %x, i32 %y) {
+; CHECK: @test3(i32 %x, i32 %y)
+; CHECK: %add1 = add i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nuw nsw i32 %x, %y
+  %add2 = add     i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test4(i32 %x, i32 %y) {
+; CHECK: @test4(i32 %x, i32 %y)
+; CHECK: %add1 = add nsw i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nsw i32 %x, %y
+  %add2 = add nsw i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+; CHECK: @test5(i32 %x, i32 %y)
+; CHECK: %add1 = add i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nuw i32 %x, %y
+  %add2 = add nsw i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test6(i32 %x, i32 %y) {
+; CHECK: @test6(i32 %x, i32 %y)
+; CHECK: %add1 = add nsw i32 %x, %y
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add nuw nsw i32 %x, %y
+  %add2 = add nsw i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
+
+define i32 @test7(i32 %x, i32 %y) {
+; CHECK: @test7(i32 %x, i32 %y)
+; CHECK: %add1 = add i32 %x, %y
+; CHECK-NOT: what_is_this
+; CHECK: %foo = add i32 %add1, %add1
+
+  %add1 = add i32 %x, %y, !what_is_this !{}
+  %add2 = add i32 %x, %y
+  %foo = add i32 %add1, %add2
+  ret i32 %foo
+}
diff --git a/test/Transforms/GVN/pre-basic-add.ll b/test/Transforms/GVN/pre-basic-add.ll
index c13099f..4bde05c 100644
--- a/test/Transforms/GVN/pre-basic-add.ll
+++ b/test/Transforms/GVN/pre-basic-add.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -enable-pre -S | grep {.pre}
+; RUN: opt < %s -gvn -enable-pre -S | grep ".pre"
 
 @H = common global i32 0		; <i32*> [#uses=2]
 @G = common global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
new file mode 100644
index 0000000..3759c41
--- /dev/null
+++ b/test/Transforms/GVN/range.ll
@@ -0,0 +1,101 @@
+; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+
+define i32 @test1(i32* %p) {
+; CHECK: @test1(i32* %p)
+; CHECK: %a = load i32* %p, !range !0
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !0
+  %b = load i32* %p, !range !0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test2(i32* %p) {
+; CHECK: @test2(i32* %p)
+; CHECK: %a = load i32* %p
+; CHECK-NOT: range
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !0
+  %b = load i32* %p
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test3(i32* %p) {
+; CHECK: @test3(i32* %p)
+; CHECK: %a = load i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !0
+  %b = load i32* %p, !range !1
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test4(i32* %p) {
+; CHECK: @test4(i32* %p)
+; CHECK: %a = load i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !0
+  %b = load i32* %p, !range !2
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test5(i32* %p) {
+; CHECK: @test5(i32* %p)
+; CHECK: %a = load i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !3
+  %b = load i32* %p, !range !4
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test6(i32* %p) {
+; CHECK: @test6(i32* %p)
+; CHECK: %a = load i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !5
+  %b = load i32* %p, !range !6
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test7(i32* %p) {
+; CHECK: @test7(i32* %p)
+; CHECK: %a = load i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !7
+  %b = load i32* %p, !range !8
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test8(i32* %p) {
+; CHECK: @test8(i32* %p)
+; CHECK: %a = load i32* %p
+; CHECK-NOT: range
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !range !9
+  %b = load i32* %p, !range !10
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; CHECK: ![[DISJOINT_RANGE]] = metadata !{i32 0, i32 2, i32 3, i32 5}
+; CHECK: ![[MERGED_RANGE]] = metadata !{i32 0, i32 5}
+; CHECK: ![[MERGED_SIGNED_RANGE]] = metadata !{i32 -3, i32 -2, i32 1, i32 2}
+; CHECK: ![[MERGED_TEST6]] = metadata !{i32 10, i32 1}
+; CHECK: ![[MERGED_TEST7]] = metadata !{i32 3, i32 4, i32 5, i32 2}
+
+!0 = metadata !{i32 0, i32 2}
+!1 = metadata !{i32 3, i32 5}
+!2 = metadata !{i32 2, i32 5}
+!3 = metadata !{i32 -3, i32 -2}
+!4 = metadata !{i32 1, i32 2}
+!5 = metadata !{i32 10, i32 1}
+!6 = metadata !{i32 12, i32 13}
+!7 = metadata !{i32 1, i32 2, i32 3, i32 4}
+!8 = metadata !{i32 5, i32 1}
+!9 = metadata !{i32 1, i32 5}
+!10 = metadata !{i32 5, i32 1}
diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll
index 4797240..e7dc9c4 100644
--- a/test/Transforms/GVN/rle-must-alias.ll
+++ b/test/Transforms/GVN/rle-must-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
 
 ; GVN should eliminate the fully redundant %9 GEP which 
 ; allows DEAD to be removed.  This is PR3198.
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index c6cd1fd..71aa548 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
 
 define i32 @main(i32* %p) {
 block1:
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
new file mode 100644
index 0000000..90661c6
--- /dev/null
+++ b/test/Transforms/GVN/tbaa.ll
@@ -0,0 +1,81 @@
+; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+
+define i32 @test1(i8* %p, i8* %q) {
+; CHECK: @test1(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p)
+; CHECK-NOT: tbaa
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !0
+  %b = call i32 @foo(i8* %p)
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test2(i8* %p, i8* %q) {
+; CHECK: @test2(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa !0
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !0
+  %b = call i32 @foo(i8* %p), !tbaa !0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test3(i8* %p, i8* %q) {
+; CHECK: @test3(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa !3
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !3
+  %b = call i32 @foo(i8* %p), !tbaa !3
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test4(i8* %p, i8* %q) {
+; CHECK: @test4(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !1
+  %b = call i32 @foo(i8* %p), !tbaa !0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test5(i8* %p, i8* %q) {
+; CHECK: @test5(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !0
+  %b = call i32 @foo(i8* %p), !tbaa !1
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test6(i8* %p, i8* %q) {
+; CHECK: @test6(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !0
+  %b = call i32 @foo(i8* %p), !tbaa !3
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test7(i8* %p, i8* %q) {
+; CHECK: @test7(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p)
+; CHECK-NOT: tbaa
+; CHECK: %c = add i32 %a, %a
+  %a = call i32 @foo(i8* %p), !tbaa !4
+  %b = call i32 @foo(i8* %p), !tbaa !3
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare i32 @foo(i8*) readonly
+
+!0 = metadata !{metadata !"C", metadata !1}
+!1 = metadata !{metadata !"A", metadata !2}
+!2 = metadata !{metadata !"tbaa root", null}
+!3 = metadata !{metadata !"B", metadata !1}
+!4 = metadata !{metadata !"another root", null}
diff --git a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
index 82abc8fe..7c07d5d 100644
--- a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
+++ b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -S | grep {16 x .31 x double.. zeroinitializer}
+; RUN: opt < %s -globalopt -S | grep "16 x .31 x double.. zeroinitializer"
 
 ; The 'X' indices could be larger than 31.  Do not SROA the outer indices of this array.
 @mm = internal global [16 x [31 x double]] zeroinitializer, align 32
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index 588d5c9..08b2cb1 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -S | grep {load volatile}
+; RUN: opt < %s -globalopt -S | grep "load volatile"
 @t0.1441 = internal global double 0x3FD5555555555555, align 8		; <double*> [#uses=1]
 
 define double @foo() nounwind  {
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index 5b06fea..d58becd 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -2,9 +2,9 @@
 ; alignments.  Elements 0 and 2 must be 16-byte aligned, and element 
 ; 1 must be at least 8 byte aligned (but could be more). 
 
-; RUN: opt < %s -globalopt -S | grep {@G.0 = internal unnamed_addr global .*align 16}
-; RUN: opt < %s -globalopt -S | grep {@G.1 = internal unnamed_addr global .*align 8}
-; RUN: opt < %s -globalopt -S | grep {@G.2 = internal unnamed_addr global .*align 16}
+; RUN: opt < %s -globalopt -S | grep "@G.0 = internal unnamed_addr global .*align 16"
+; RUN: opt < %s -globalopt -S | grep "@G.1 = internal unnamed_addr global .*align 8"
+; RUN: opt < %s -globalopt -S | grep "@G.2 = internal unnamed_addr global .*align 16"
 ; rdar://5891920
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
index c4b6e52..e76c44d 100644
--- a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
+++ b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -S | grep {phi.*@head}
+; RUN: opt < %s -globalopt -S | grep "phi.*@head"
 ; PR3321
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 3154856..0f3efa0 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -stats -disable-output |& grep "1 globalopt - Number of global vars shrunk to booleans"
+; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans"
 
 @Stop = internal global i32 0                     ; <i32*> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
index d645ce4..059af1c 100644
--- a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
+++ b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -S | grep {@X = internal unnamed_addr global i32}
+; RUN: opt < %s -globalopt -S | grep "@X = internal unnamed_addr global i32"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 @X = internal global i32* null		; <i32**> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll b/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
index 54e8f90..40862bd 100644
--- a/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
+++ b/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
@@ -17,7 +17,7 @@ define void @test() nounwind ssp {
   %2 = sext i32 %1 to i64                         ; <i64> [#uses=1]
   %3 = mul i64 %2, ptrtoint (%struct.strchartype* getelementptr (%struct.strchartype* null, i64 1) to i64) ; <i64> [#uses=1]
   %4 = tail call i8* @malloc(i64 %3)              ; <i8*> [#uses=1]
-; CHECK: call i8* @malloc(i64
+; CHECK-NOT: call i8* @malloc(i64
   %5 = bitcast i8* %4 to %struct.strchartype*     ; <%struct.strchartype*> [#uses=1]
   store %struct.strchartype* %5, %struct.strchartype** @chartypes, align 8
   ret void
diff --git a/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll b/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll
new file mode 100644
index 0000000..0c58c1a
--- /dev/null
+++ b/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; Check that the mere presence of a blockaddress doesn't prevent -globalopt
+; from promoting @f to fastcc.
+
+; CHECK: define{{.*}}fastcc{{.*}}@f
+define internal i8* @f() {
+  ret i8* blockaddress(@f, %L1)
+L1:
+  ret i8* null
+}
+
+define void @g() {
+  ; CHECK: call{{.*}}fastcc{{.*}}@f
+  %p = call i8* @f()
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
new file mode 100644
index 0000000..a472f10
--- /dev/null
+++ b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
@@ -0,0 +1,49 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+
+@glbl = internal global i8* null
+
+define void @test1a() {
+; CHECK: @test1a
+; CHECK-NOT: store
+; CHECK-NEXT: ret void
+  store i8* null, i8** @glbl
+  ret void
+}
+
+define void @test1b(i8* %p) {
+; CHECK: @test1b
+; CHECK-NEXT: store
+; CHECK-NEXT: ret void
+  store i8* %p, i8** @glbl
+  ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+; CHECK: alloca i8
+  %txt = alloca i8
+  call void @foo2(i8* %txt)
+  %call2 = call i8* @strdup(i8* %txt)
+  store i8* %call2, i8** @glbl
+  ret void
+}
+declare i8* @strdup(i8*)
+declare void @foo2(i8*)
+
+define void @test3() uwtable {
+; CHECK: @test3
+; CHECK-NOT: bb1:
+; CHECK-NOT: bb2:
+; CHECK: invoke
+  %ptr = invoke i8* @_Znwm(i64 1)
+          to label %bb1 unwind label %bb2
+bb1:
+  store i8* %ptr, i8** @glbl
+  unreachable
+bb2:
+  %tmp1 = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0
+          cleanup
+  resume { i8*, i32 } %tmp1
+}
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+declare i8* @_Znwm(i64)
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index 099c607..be13a98 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -globalopt -S | \
-; RUN:   grep {internal fastcc float @foo}
+; RUN:   grep "internal fastcc float @foo"
 
 define internal float @foo() {
         ret float 0.000000e+00
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index c8d8e76..cad5a91 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -1,9 +1,25 @@
-; RUN: opt < %s -globalopt -S | not grep internal
+; RUN: opt < %s -globalopt -S | FileCheck %s
 
-@G = internal global i32 123            ; <i32*> [#uses=1]
+@G1 = internal global i32 123            ; <i32*> [#uses=1]
 
-define void @foo() {
-        store i32 1, i32* @G
+; CHECK-NOT: @G1
+; CHECK: @G2
+; CHECK-NOT: @G3
+
+define void @foo1() {
+; CHECK: define void @foo
+; CHECK-NEXT: ret
+        store i32 1, i32* @G1
+        ret void
+}
+
+@G2 = linkonce_odr constant i32 42
+
+define void @foo2() {
+; CHECK: define void @foo2
+; CHECK-NEXT: store
+        store i32 1, i32* @G2
         ret void
 }
 
+@G3 = linkonce_odr constant i32 42
diff --git a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
index 1e0db6a..cc655e9 100644
--- a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
+++ b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -globalopt -S > %t
-; RUN: grep {@Y = internal unnamed_addr global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t
+; RUN: grep "@Y = internal unnamed_addr global \[3 x [%]struct.X\] zeroinitializer" %t
 ; RUN: grep load %t | count 6
-; RUN: grep {add i32 \[%\]a, \[%\]b} %t | count 3
+; RUN: grep "add i32 [%]a, [%]b" %t | count 3
 
 ; globalopt should not sra the global, because it can't see the index.
 
diff --git a/test/Transforms/GlobalOpt/heap-sra-phi.ll b/test/Transforms/GlobalOpt/heap-sra-phi.ll
index 6188e5a..123ad85 100644
--- a/test/Transforms/GlobalOpt/heap-sra-phi.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. }
-; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. }
+; RUN: opt < %s -globalopt -S | grep "tmp.f1 = phi i32. "
+; RUN: opt < %s -globalopt -S | grep "tmp.f0 = phi i32. "
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 	%struct.foo = type { i32, i32 }
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 59403b1..5a34a9c 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -instcombine | \
-; RUN:    llvm-dis | grep {ret i1 true}
+; RUN:    llvm-dis | grep "ret i1 true"
 
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
 
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 94e07a0..dcfe009 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -S | \
-; RUN:   grep {G1 = internal unnamed_addr constant}
+; RUN:   grep "G1 = internal unnamed_addr constant"
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/GlobalOpt/storepointer-compare.ll b/test/Transforms/GlobalOpt/storepointer-compare.ll
index 2f5ae86..09e20a8 100644
--- a/test/Transforms/GlobalOpt/storepointer-compare.ll
+++ b/test/Transforms/GlobalOpt/storepointer-compare.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -S | \
-; RUN:   grep {call void @Actual}
+; RUN:   grep "call void @Actual"
 
 ; Check that a comparison does not prevent an indirect call from being made 
 ; direct.  The global will still remain, but indirect call elim is still good.
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index be02821..ee75058 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -4,17 +4,31 @@
 @b = internal global i32 0, align 4
 @c = internal global i32 0, align 4
 @d = internal constant [4 x i8] c"foo\00", align 1
+@e = linkonce_odr global i32 0
 
 ; CHECK: @a = internal global i32 0, align 4
 ; CHECK: @b = internal global i32 0, align 4
 ; CHECK: @c = internal unnamed_addr global i32 0, align 4
 ; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
+; CHECK: @e = linkonce_odr global i32 0
+
+define i32 @get_e() {
+       %t = load i32* @e
+       ret i32 %t
+}
+
+define void @set_e(i32 %x) {
+       store i32 %x, i32* @e
+       ret void
+}
 
 define i1 @bah(i64 %i) nounwind readonly optsize ssp {
 entry:
   %arrayidx4 = getelementptr inbounds [4 x i8]* @d, i64 0, i64 %i
   %tmp5 = load i8* %arrayidx4, align 1
-  %cmp = icmp eq i8 %tmp5, 42
+  %array0 = bitcast [4 x i8]* @d to i8*
+  %tmp6 = load i8* %array0, align 1
+  %cmp = icmp eq i8 %tmp5, %tmp6
   ret i1 %cmp
 }
 
diff --git a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
index 6640336..54a65d6 100644
--- a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
+++ b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -ipconstprop -S | grep {ret i32 %r}
+; RUN: opt < %s -ipconstprop -S | grep "ret i32 %r"
 ; Should not propagate the result of a weak function.
 ; PR2411
 
diff --git a/test/Transforms/IPConstantProp/return-argument.ll b/test/Transforms/IPConstantProp/return-argument.ll
index f4b7018..2a14f05 100644
--- a/test/Transforms/IPConstantProp/return-argument.ll
+++ b/test/Transforms/IPConstantProp/return-argument.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -ipconstprop -S > %t
-; RUN: cat %t | grep {store i32 %Z, i32\\* %Q}
-; RUN: cat %t | grep {add i32 1, 3}
+; RUN: cat %t | grep "store i32 %Z, i32\* %Q"
+; RUN: cat %t | grep "add i32 1, 3"
 
 ;; This function returns its second argument on all return statements
 define internal i32* @incdec(i1 %C, i32* %V) {
diff --git a/test/Transforms/IPConstantProp/return-constant.ll b/test/Transforms/IPConstantProp/return-constant.ll
index ff15df7..499d383 100644
--- a/test/Transforms/IPConstantProp/return-constant.ll
+++ b/test/Transforms/IPConstantProp/return-constant.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -ipconstprop -instcombine | \
-; RUN:    llvm-dis | grep {ret i1 true} | count 2
+; RUN:    llvm-dis | grep "ret i1 true" | count 2
 define internal i32 @foo(i1 %C) {
         br i1 %C, label %T, label %F
 
diff --git a/test/Transforms/IPConstantProp/return-constants.ll b/test/Transforms/IPConstantProp/return-constants.ll
index 2cd99fe..be2ca71 100644
--- a/test/Transforms/IPConstantProp/return-constants.ll
+++ b/test/Transforms/IPConstantProp/return-constants.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -ipconstprop -S > %t
 ;; Check that the 21 constants got propagated properly
-; RUN: cat %t | grep {%M = add i32 21, 21}
+; RUN: cat %t | grep "%M = add i32 21, 21"
 ;; Check that the second return values didn't get propagated
-; RUN: cat %t | grep {%N = add i32 %B, %D}
+; RUN: cat %t | grep "%N = add i32 %B, %D"
 
 %0 = type { i32, i32 }
 
diff --git a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
index 1ba6982..edeead1 100644
--- a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
+++ b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -indvars -S | \
-; RUN:   grep {ret i32 152}
+; RUN:   grep "ret i32 152"
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
index 1bbc631..c4e6cd4 100644
--- a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
+++ b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
@@ -1,6 +1,6 @@
 ; PR726
 ; RUN: opt < %s -indvars -S | \
-; RUN:   grep {ret i32 27}
+; RUN:   grep "ret i32 27"
 
 ; Make sure to compute the right exit value based on negative strides.
 
diff --git a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
index 268b8d1..6366c8c 100644
--- a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
+++ b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
@@ -1,5 +1,5 @@
 ; PR1015
-; RUN: opt < %s -indvars -S | not grep {ret i32 0}
+; RUN: opt < %s -indvars -S | not grep "ret i32 0"
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
index dd400be..b461566 100644
--- a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | not grep {sext}
+; RUN: opt < %s -indvars -S | not grep "sext"
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 target triple = "x86_64-apple-darwin9.6"
diff --git a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
index 55e8a50..0722d89 100644
--- a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -instcombine -S | not grep {\[sz\]ext}
+; RUN: opt < %s -indvars -instcombine -S | not grep "[sz]ext"
 ; ModuleID = '<stdin>'
 ;extern int *a, *b, *c, *d, *e, *f;  /* 64 bit */
 ;extern int K[256];
diff --git a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
new file mode 100644
index 0000000..7c5f818
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR13371: indvars pass incorrectly substitutes 'undef' values
+;
+; LFTR should not user %undef as the loop counter.
+; CHECK: @test
+; CHECK-NOT: icmp{{.*}}undef
+@.str3 = private constant [6 x i8] c"%lld\0A\00", align 1
+declare i32 @printf(i8* noalias nocapture, ...) nounwind
+define i64 @test() nounwind {
+func_start:
+  br label %block9
+block9:                                           ; preds = %block9,%func_start
+  %undef = phi i64 [ %next_undef, %block9 ], [ undef, %func_start ]
+  %iter = phi i64 [ %next_iter, %block9 ], [ 1, %func_start ]
+  %next_iter = add nsw i64 %iter, 1
+  %0 = tail call i32 (i8*, ...)* @printf(i8* noalias nocapture getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i64 %next_iter, i64 %undef)
+  %next_undef = add nsw i64 %undef, 1
+  %_tmp_3 = icmp slt i64 %next_iter, 100
+  br i1 %_tmp_3, label %block9, label %exit
+exit:                                             ; preds = %block9
+  ret i64 0
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-max.ll b/test/Transforms/IndVarSimplify/eliminate-max.ll
index c25bd0e..98510ea 100644
--- a/test/Transforms/IndVarSimplify/eliminate-max.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-max.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -indvars | grep {= icmp} | count 3
+; RUN: opt < %s -S -indvars | grep "= icmp" | count 3
 ; PR4914.ll
 
 ; Indvars should be able to do range analysis and eliminate icmps.
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 9abfe13..7fb36e5 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -153,6 +153,9 @@ return:
 ; Remove %i which is only used by the exit test.
 ; Verify that SCEV can still compute a backedge count from the sign
 ; extended %n, used for pointer comparison by LFTR.
+;
+; TODO: Fix for PR13371 currently makes this impossible. See
+; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules.
 define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
 entry:
   %x.ext = sext i32 %x to i64
@@ -162,13 +165,13 @@ entry:
   %lim = add i32 %x, %n
   %cmp.ph = icmp ult i32 %x, %lim
   br i1 %cmp.ph, label %loop, label %exit
-
+; CHECK: @geplftr
 ; CHECK: loop:
 ; CHECK: phi i8*
-; CHECK-NOT: phi
+; DISABLE-NOT: phi      // This check is currently disabled
 ; CHECK: getelementptr
 ; CHECK: store
-; CHECK: icmp ne i8*
+; DISABLE: icmp ne i8*  // This check is currently disabled
 ; CHECK: br i1
 loop:
   %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
@@ -187,7 +190,7 @@ exit:
 define void @nevertaken() nounwind uwtable ssp {
 entry:
   br label %loop
-
+; CHECK: @nevertaken
 ; CHECK: loop:
 ; CHECK-NOT: phi
 ; CHECK-NOT: add
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index c3619f6..e51a341 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -indvars -S \
-; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
+; RUN:   | grep "%b.1 = phi i32 [ 2, %bb ], [ 1, %bb2 ]"
 ;
 ; This loop has multiple exits, and the value of %b1 depends on which
 ; exit is taken. Indvars should correctly compute the exit values.
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
index 9f3bcaf..21fb7ef 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S > %t
-; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
-; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
+; RUN: grep "[%]tmp7 = icmp eq i8 -28, -28" %t
+; RUN: grep "[%]tmp8 = icmp eq i8 63, 63" %t
 ; PR4477
 ; Indvars should compute the exit values in loop.
 ;
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
index 65c66f7..0c1b590 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | grep {ret i32 600000}
+; RUN: opt < %s -indvars -S | grep "ret i32 600000"
 ; PR1179
 
 define i32 @foo() {
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
index e4b642c..d7eb406 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | grep {ret i32 9900}
+; RUN: opt < %s -indvars -S | grep "ret i32 9900"
 ; PR1179
 
 define i32 @test4() {
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
index 80b961a..38f95bf 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | grep {120, %bb2.bb3_crit_edge}
+; RUN: opt < %s -indvars -S | grep "120, %bb2.bb3_crit_edge"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/IndVarSimplify/shrunk-constant.ll b/test/Transforms/IndVarSimplify/shrunk-constant.ll
index 271f8ed..45297d6 100644
--- a/test/Transforms/IndVarSimplify/shrunk-constant.ll
+++ b/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<%loop> to i32)}
+; RUN:  | grep "\-->  (zext i4 {-7,+,-8}<%loop> to i32)"
 
 define fastcc void @foo() nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
new file mode 100644
index 0000000..c58a3af
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+define void @test1(float* nocapture %autoc, float* nocapture %data, float %d, i32 %data_len, i32 %sample) nounwind {
+entry:
+  %sub = sub i32 %data_len, %sample
+  %cmp4 = icmp eq i32 %data_len, %sample
+  br i1 %cmp4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %0 = trunc i64 %indvars.iv to i32
+  %add = add i32 %0, %sample
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds float* %data, i64 %idxprom
+  %1 = load float* %arrayidx, align 4
+  %mul = fmul float %1, %d
+  %arrayidx2 = getelementptr inbounds float* %autoc, i64 %indvars.iv
+  %2 = load float* %arrayidx2, align 4
+  %add3 = fadd float %2, %mul
+  store float %add3, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp ult i32 %3, %sub
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+
+; CHECK: @test1
+
+; First check that we move the sub into the preheader, it doesn't have to be
+; executed if %cmp4 == false
+; CHECK: for.body.preheader:
+; CHECK: sub i32 %data_len, %sample
+; CHECK: br label %for.body
+
+; Second, check that we turn the IV test into an eq.
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0
+; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
+}
+
diff --git a/test/Transforms/Inline/2007-04-15-InlineEH.ll b/test/Transforms/Inline/2007-04-15-InlineEH.ll
index 8fbcf92..b114537 100644
--- a/test/Transforms/Inline/2007-04-15-InlineEH.ll
+++ b/test/Transforms/Inline/2007-04-15-InlineEH.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | not grep {invoke void asm}
+; RUN: opt < %s -inline -S | not grep "invoke void asm"
 ; PR1335
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/Inline/casts.ll b/test/Transforms/Inline/casts.ll
index 166185a..a7b051b 100644
--- a/test/Transforms/Inline/casts.ll
+++ b/test/Transforms/Inline/casts.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | grep {ret i32 1}
+; RUN: opt < %s -inline -S | grep "ret i32 1"
 ; ModuleID = 'short.opt.bc'
 
 define i32 @testBool(i1 %X) {
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 3505608..7716d6a 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -1,5 +1,5 @@
-; RUN: opt %s -S  -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined}
-; RUN: opt %s -S  -inline -stats |& grep {Number of functions inlined}
+; RUN: opt %s -S  -inline -functionattrs -stats 2>&1 | grep "Number of call sites deleted, not inlined"
+; RUN: opt %s -S  -inline -stats 2>&1 | grep "Number of functions inlined"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
diff --git a/test/Transforms/Inline/externally_available.ll b/test/Transforms/Inline/externally_available.ll
index 08b5638..07274e7 100644
--- a/test/Transforms/Inline/externally_available.ll
+++ b/test/Transforms/Inline/externally_available.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -inline -constprop -S > %t
 ; RUN: not grep test_function %t
-; RUN: grep {ret i32 5} %t
+; RUN: grep "ret i32 5" %t
 
 
 ; test_function should not be emitted to the .s file.
diff --git a/test/Transforms/Inline/inline-byval-bonus.ll b/test/Transforms/Inline/inline-byval-bonus.ll
new file mode 100644
index 0000000..f3ed819
--- /dev/null
+++ b/test/Transforms/Inline/inline-byval-bonus.ll
@@ -0,0 +1,193 @@
+; RUN: opt -S -inline -inline-threshold=275 < %s | FileCheck %s
+; PR13095
+
+; The performance of the c-ray benchmark largely depends on the inlining of a
+; specific call to @ray_sphere. This test case is designed to verify that it's
+; inlined at -O3.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.sphere = type { %struct.vec3, double, %struct.material, %struct.sphere* }
+%struct.vec3 = type { double, double, double }
+%struct.material = type { %struct.vec3, double, double }
+%struct.ray = type { %struct.vec3, %struct.vec3 }
+%struct.spoint = type { %struct.vec3, %struct.vec3, %struct.vec3, double }
+
+define i32 @caller(%struct.sphere* %i) {
+  %shadow_ray = alloca %struct.ray, align 8
+  call void @fix(%struct.ray* %shadow_ray)
+
+  %call = call i32 @ray_sphere(%struct.sphere* %i, %struct.ray* byval align 8 %shadow_ray, %struct.spoint* null)
+  ret i32 %call
+
+; CHECK: @caller
+; CHECK-NOT: call i32 @ray_sphere
+; CHECK: ret i32
+}
+
+declare void @fix(%struct.ray*)
+
+define i32 @ray_sphere(%struct.sphere* nocapture %sph, %struct.ray* nocapture byval align 8 %ray, %struct.spoint* %sp) nounwind uwtable ssp {
+  %1 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 0
+  %2 = load double* %1, align 8
+  %3 = fmul double %2, %2
+  %4 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 1
+  %5 = load double* %4, align 8
+  %6 = fmul double %5, %5
+  %7 = fadd double %3, %6
+  %8 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 2
+  %9 = load double* %8, align 8
+  %10 = fmul double %9, %9
+  %11 = fadd double %7, %10
+  %12 = fmul double %2, 2.000000e+00
+  %13 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 0
+  %14 = load double* %13, align 8
+  %15 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 0
+  %16 = load double* %15, align 8
+  %17 = fsub double %14, %16
+  %18 = fmul double %12, %17
+  %19 = fmul double %5, 2.000000e+00
+  %20 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 1
+  %21 = load double* %20, align 8
+  %22 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 1
+  %23 = load double* %22, align 8
+  %24 = fsub double %21, %23
+  %25 = fmul double %19, %24
+  %26 = fadd double %18, %25
+  %27 = fmul double %9, 2.000000e+00
+  %28 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 2
+  %29 = load double* %28, align 8
+  %30 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 2
+  %31 = load double* %30, align 8
+  %32 = fsub double %29, %31
+  %33 = fmul double %27, %32
+  %34 = fadd double %26, %33
+  %35 = fmul double %16, %16
+  %36 = fmul double %23, %23
+  %37 = fadd double %35, %36
+  %38 = fmul double %31, %31
+  %39 = fadd double %37, %38
+  %40 = fmul double %14, %14
+  %41 = fadd double %40, %39
+  %42 = fmul double %21, %21
+  %43 = fadd double %42, %41
+  %44 = fmul double %29, %29
+  %45 = fadd double %44, %43
+  %46 = fsub double -0.000000e+00, %16
+  %47 = fmul double %14, %46
+  %48 = fmul double %21, %23
+  %49 = fsub double %47, %48
+  %50 = fmul double %29, %31
+  %51 = fsub double %49, %50
+  %52 = fmul double %51, 2.000000e+00
+  %53 = fadd double %52, %45
+  %54 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 1
+  %55 = load double* %54, align 8
+  %56 = fmul double %55, %55
+  %57 = fsub double %53, %56
+  %58 = fmul double %34, %34
+  %59 = fmul double %11, 4.000000e+00
+  %60 = fmul double %59, %57
+  %61 = fsub double %58, %60
+  %62 = fcmp olt double %61, 0.000000e+00
+  br i1 %62, label %130, label %63
+
+; <label>:63                                      ; preds = %0
+  %64 = tail call double @sqrt(double %61) nounwind readnone
+  %65 = fsub double -0.000000e+00, %34
+  %66 = fsub double %64, %34
+  %67 = fmul double %11, 2.000000e+00
+  %68 = fdiv double %66, %67
+  %69 = fsub double %65, %64
+  %70 = fdiv double %69, %67
+  %71 = fcmp olt double %68, 1.000000e-06
+  %72 = fcmp olt double %70, 1.000000e-06
+  %or.cond = and i1 %71, %72
+  br i1 %or.cond, label %130, label %73
+
+; <label>:73                                      ; preds = %63
+  %74 = fcmp ogt double %68, 1.000000e+00
+  %75 = fcmp ogt double %70, 1.000000e+00
+  %or.cond1 = and i1 %74, %75
+  br i1 %or.cond1, label %130, label %76
+
+; <label>:76                                      ; preds = %73
+  %77 = icmp eq %struct.spoint* %sp, null
+  br i1 %77, label %130, label %78
+
+; <label>:78                                      ; preds = %76
+  %t1.0 = select i1 %71, double %70, double %68
+  %t2.0 = select i1 %72, double %t1.0, double %70
+  %79 = fcmp olt double %t1.0, %t2.0
+  %80 = select i1 %79, double %t1.0, double %t2.0
+  %81 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 3
+  store double %80, double* %81, align 8
+  %82 = fmul double %80, %2
+  %83 = fadd double %14, %82
+  %84 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 0
+  store double %83, double* %84, align 8
+  %85 = fmul double %5, %80
+  %86 = fadd double %21, %85
+  %87 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 1
+  store double %86, double* %87, align 8
+  %88 = fmul double %9, %80
+  %89 = fadd double %29, %88
+  %90 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 2
+  store double %89, double* %90, align 8
+  %91 = load double* %15, align 8
+  %92 = fsub double %83, %91
+  %93 = load double* %54, align 8
+  %94 = fdiv double %92, %93
+  %95 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 0
+  store double %94, double* %95, align 8
+  %96 = load double* %22, align 8
+  %97 = fsub double %86, %96
+  %98 = load double* %54, align 8
+  %99 = fdiv double %97, %98
+  %100 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 1
+  store double %99, double* %100, align 8
+  %101 = load double* %30, align 8
+  %102 = fsub double %89, %101
+  %103 = load double* %54, align 8
+  %104 = fdiv double %102, %103
+  %105 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 2
+  store double %104, double* %105, align 8
+  %106 = fmul double %2, %94
+  %107 = fmul double %5, %99
+  %108 = fadd double %106, %107
+  %109 = fmul double %9, %104
+  %110 = fadd double %108, %109
+  %111 = fmul double %110, 2.000000e+00
+  %112 = fmul double %94, %111
+  %113 = fsub double %112, %2
+  %114 = fsub double -0.000000e+00, %113
+  %115 = fmul double %99, %111
+  %116 = fsub double %115, %5
+  %117 = fsub double -0.000000e+00, %116
+  %118 = fmul double %104, %111
+  %119 = fsub double %118, %9
+  %120 = fsub double -0.000000e+00, %119
+  %.06 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 0
+  %.18 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 1
+  %.210 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 2
+  %121 = fmul double %113, %113
+  %122 = fmul double %116, %116
+  %123 = fadd double %121, %122
+  %124 = fmul double %119, %119
+  %125 = fadd double %123, %124
+  %126 = tail call double @sqrt(double %125) nounwind readnone
+  %127 = fdiv double %114, %126
+  store double %127, double* %.06, align 8
+  %128 = fdiv double %117, %126
+  store double %128, double* %.18, align 8
+  %129 = fdiv double %120, %126
+  store double %129, double* %.210, align 8
+  br label %130
+
+; <label>:130                                     ; preds = %78, %76, %73, %63, %0
+  %.0 = phi i32 [ 0, %0 ], [ 0, %73 ], [ 0, %63 ], [ 1, %76 ], [ 1, %78 ]
+  ret i32 %.0
+}
+
+declare double @sqrt(double) nounwind readnone
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 1f34113..e077523 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | not grep {tail call void @llvm.memcpy.i32}
+; RUN: opt < %s -inline -S | not grep "tail call void @llvm.memcpy.i32"
 ; PR3550
 
 define internal void @foo(i32* %p, i32* %q) {
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
new file mode 100644
index 0000000..20d7426
--- /dev/null
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -Oz %s | FileCheck %s -check-prefix=OZ
+; RUN: opt -S -O2 %s | FileCheck %s -check-prefix=O2
+
+; The inline threshold for a function with the optsize attribute is currently
+; the same as the global inline threshold for -Os. Check that the optsize
+; function attribute don't alter the function specific inline threshold if the
+; global inline threshold is lower (as for -Oz).
+
+@a = global i32 4
+
+; This function should be larger than the inline threshold for -Oz (25), but
+; smaller than the inline threshold for optsize (75).
+define i32 @inner() {
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1,  %a1
+  %a2 = load volatile i32* @a
+  %x2 = add i32 %x1, %a2
+  %a3 = load volatile i32* @a
+  %x3 = add i32 %x2, %a3
+  %a4 = load volatile i32* @a
+  %x4 = add i32 %x3, %a4
+  %a5 = load volatile i32* @a
+  %x5 = add i32 %x3, %a5
+  ret i32 %x5
+}
+
+; @inner() should be inlined for -O2 but not for -Oz.
+; OZ: call
+; O2-NOT: call
+define i32 @outer() optsize {
+   %r = call i32 @inner()
+   ret i32 %r
+}
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index dc35b60..0b48a72 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -110,3 +110,65 @@ bb.merge:
 bb.false:
   ret i32 %sub
 }
+
+
+define i32 @PR13412.main() {
+; This is a somewhat complicated three layer subprogram that was reported to
+; compute the wrong value for a branch due to assuming that an argument
+; mid-inline couldn't be equal to another pointer.
+;
+; After inlining, the branch should point directly to the exit block, not to
+; the intermediate block.
+; CHECK: @PR13412.main
+; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]]
+; CHECK: [[FALSE_DEST]]:
+; CHECK-NEXT: call void @PR13412.fail()
+; CHECK: [[TRUE_DEST]]:
+; CHECK-NEXT: ret i32 0
+
+entry:
+  %i1 = alloca i64
+  store i64 0, i64* %i1
+  %arraydecay = bitcast i64* %i1 to i32*
+  %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay)
+  br i1 %call, label %cond.end, label %cond.false
+
+cond.false:
+  call void @PR13412.fail()
+  br label %cond.end
+
+cond.end:
+  ret i32 0
+}
+
+define internal i1 @PR13412.first(i32* %a, i32* %b) {
+entry:
+  %call = call i32* @PR13412.second(i32* %a, i32* %b)
+  %cmp = icmp eq i32* %call, %b
+  ret i1 %cmp
+}
+
+declare void @PR13412.fail()
+
+define internal i32* @PR13412.second(i32* %a, i32* %b) {
+entry:
+  %sub.ptr.lhs.cast = ptrtoint i32* %b to i64
+  %sub.ptr.rhs.cast = ptrtoint i32* %a to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
+  %cmp = icmp ugt i64 %sub.ptr.div, 1
+  br i1 %cmp, label %if.then, label %if.end3
+
+if.then:
+  %0 = load i32* %a
+  %1 = load i32* %b
+  %cmp1 = icmp eq i32 %0, %1
+  br i1 %cmp1, label %return, label %if.end3
+
+if.end3:
+  br label %return
+
+return:
+  %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ]
+  ret i32* %retval.0
+}
diff --git a/test/Transforms/Inline/inline_prune.ll b/test/Transforms/Inline/inline_prune.ll
index 658a422..4c1574d 100644
--- a/test/Transforms/Inline/inline_prune.ll
+++ b/test/Transforms/Inline/inline_prune.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -inline -S | \
-; RUN:    not grep {callee\[12\](}
+; RUN:    not grep "callee[12]("
 ; RUN: opt < %s -inline -S | not grep mul
 
 define internal i32 @callee1(i32 %A, i32 %B) {
diff --git a/test/Transforms/Inline/invoke_test-1.ll b/test/Transforms/Inline/invoke_test-1.ll
index e0e6d60..922351f 100644
--- a/test/Transforms/Inline/invoke_test-1.ll
+++ b/test/Transforms/Inline/invoke_test-1.ll
@@ -2,7 +2,7 @@
 ; instructions
 
 ; RUN: opt < %s -inline -S | \
-; RUN:   not grep {call\[^e\]}
+; RUN:   not grep "call[^e]"
 
 declare void @might_throw()
 
diff --git a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
index 1154bb4..4233797 100644
--- a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
+++ b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {ret i1 false}
+; RUN:    grep "ret i1 false"
 
 define i1 @test(i1 %V) {
         %Y = icmp ult i1 %V, false              ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
index 8169d21..d17db8d 100644
--- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
+++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -mem2reg -S | \
-; RUN:   not grep {i32 1}
+; RUN:   not grep "i32 1"
 
 ; When propagating the load through the select, make sure that the load is
 ; inserted where the original load was, not where the select is.  Not doing
diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
index e646edf..0d5fc81 100644
--- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
+++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -mem2reg -simplifycfg | \
-; RUN:   llvm-dis | grep -v store | not grep {i32 1}
+; RUN:   llvm-dis | grep -v store | not grep "i32 1"
 
 ; Test to make sure that instcombine does not accidentally propagate the load
 ; into the PHI, which would break the program.
diff --git a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
index 38553d7..02bc043 100644
--- a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
+++ b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep {ret i1 false}
+; RUN:   not grep "ret i1 false"
 
 define i1 @test(i64 %tmp.169) {
         %tmp.1710 = lshr i64 %tmp.169, 1                ; <i64> [#uses=1]
diff --git a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
index 3d887dd..0a513c6 100644
--- a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
+++ b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {ret i1 true}
+; RUN:   grep "ret i1 true"
 ; PR586
 
 @g_07918478 = external global i32               ; <i32*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
index 5a74bd2..295006c 100644
--- a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {icmp sgt}
+; RUN:   grep "icmp sgt"
 ; END.
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
index c3700a0..0c8eece 100644
--- a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
+++ b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep icmp | count 1
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {icmp ugt} | count 1
+; RUN:   grep "icmp ugt" | count 1
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
index e5238a5..635a09c 100644
--- a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
+++ b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp ugt}
+; RUN: opt < %s -instcombine -S | grep "icmp ugt"
 ; PR1107
 ; PR1940
 
diff --git a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
index d2d215f..4fcfd26 100644
--- a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
+++ b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {fcmp uno.*0.0}
+; RUN: opt < %s -instcombine -S | grep "fcmp uno.*0.0"
 ; PR1111
 define i1 @test(double %X) {
   %tmp = fcmp une double %X, %X
diff --git a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
index bd15dce..4d1b982 100644
--- a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
+++ b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ugt.*, 1}
+; RUN: opt < %s -instcombine -S | grep "ugt.*, 1"
 
 define i1 @test(i32 %tmp1030) {
 	%tmp1037 = icmp ne i32 %tmp1030, 40		; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
index 05891a2..e2bebec 100644
--- a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
+++ b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -instcombine -mem2reg -S | grep {%A = alloca} 
+; RUN: opt < %s -instcombine -mem2reg -S | grep "%A = alloca" 
 ; RUN: opt < %s -instcombine -mem2reg -S | \
-; RUN:    not grep {%B = alloca}
+; RUN:    not grep "%B = alloca"
 ; END.
 
 ; Ensure that instcombine doesn't sink the loads in entry/cond_true into 
diff --git a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
index 109e4a2..826d68a 100644
--- a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
+++ b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp sle}
+; RUN: opt < %s -instcombine -S | grep "icmp sle"
 ; PR1244
 
 define i1 @test(i32 %c.3.i, i32 %d.292.2.i) {
diff --git a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
index ca93af3..719da70 100644
--- a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
+++ b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -1,5 +1,5 @@
 ; For PR1248
-; RUN: opt < %s -instcombine -S | grep {ugt i32 .*, 11}
+; RUN: opt < %s -instcombine -S | grep "ugt i32 .*, 11"
 define i1 @test(i32 %tmp6) {
   %tmp7 = sdiv i32 %tmp6, 12     ; <i32> [#uses=1]
   icmp ne i32 %tmp7, -6           ; <i1>:1 [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
index c794004..7e9c9e2 100644
--- a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
+++ b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -1,6 +1,6 @@
 ; PR1271
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {icmp eq i32 .tmp.*, 2146435072}
+; RUN:    grep "icmp eq i32 .tmp.*, 2146435072"
 %struct..0anon = type { i32, i32 }
 %struct..1anon = type { double }
 
diff --git a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
index 807efcf..c4070a1 100644
--- a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
+++ b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
@@ -1,6 +1,6 @@
 ; PR1271
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {ashr exact i32 %.mp137, 2}
+; RUN:    grep "ashr exact i32 %.mp137, 2"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
index 15988b6..eb0c364 100644
--- a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
+++ b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {call.*sret}
+; RUN: opt < %s -instcombine -S | grep "call.*sret"
 ; Make sure instcombine doesn't drop the sret attribute.
 
 define void @blah(i16* %tmp10) {
diff --git a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
index 62b9351..082b215 100644
--- a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
+++ b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ashr}
+; RUN: opt < %s -instcombine -S | grep "ashr"
 ; PR1499
 
 define void @av_cmp_q_cond_true(i32* %retval, i32* %tmp9, i64* %tmp10) {
diff --git a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
index af539c1..b2b04d6 100644
--- a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 true}
+; RUN: opt < %s -instcombine -S | grep "ret i1 true"
 ; rdar://5278853
 
 define i1 @test(i32 %tmp468) {
diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
index 4c5c367..95a445c 100644
--- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll
+++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {call.*stacksave}
+; RUN: opt < %s -instcombine -S | grep "call.*stacksave"
 ; PR1745
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
index 5282739..6b83dd9 100644
--- a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 1}
+; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 1"
 ; PR1800
 
 define i1 @test(i32 %In) {
diff --git a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
index 6420537b..89f8672 100644
--- a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
+++ b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
-; RUN: opt < %s -instcombine -S | not grep {ret i1 0}
+; RUN: opt < %s -instcombine -S | not grep "ret i1 0"
 ; PR1850
 
 define i1 @test() {
diff --git a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
index cc89f6d..3745e87 100644
--- a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
+++ b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {add} | count 1
+; RUN: opt < %s -instcombine -S | grep "add" | count 1
 
 define i32 @foo(i32 %a) {
 entry:
diff --git a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
index 28a94ce..1f9c47c 100644
--- a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
+++ b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep {a.off}
+; RUN: opt < %s -instcombine -S | not grep "a.off"
 ; PR1949
 
 define i1 @test1(i32 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
index af61c15..917d3d9 100644
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
+++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i.* 0} | count 2
+; RUN: opt < %s -instcombine -S | grep "ret i.* 0" | count 2
 ; PR2048
 
 define i32 @i(i32 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
index d26dec1..854f8cb 100644
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
+++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {sdiv i8 \%a, 9}
+; RUN: opt < %s -instcombine -S | grep "sdiv i8 \%a, 9"
 ; PR2048
 
 define i8 @i(i8 %a) {
diff --git a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
index da7e49e..0fa4d71 100644
--- a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
+++ b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {16} | count 1
+; RUN: opt < %s -instcombine -S | grep "16" | count 1
 
 define i8* @bork(i8** %qux) {
   %tmp275 = load i8** %qux, align 1
diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
index de08c32..dba6cdb 100644
--- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
+++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store volatile}
+; RUN: opt < %s -instcombine -S | grep "store volatile"
 
 define void @test() {
 	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index 1286e3d..fd0217e 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 @g_1 = internal global i32 0		; <i32*> [#uses=3]
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index ebbd3a7..8022414 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
 ; PR2262
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
index bbd0042..7a1c844 100644
--- a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
+++ b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store i8} | count 3
+; RUN: opt < %s -instcombine -S | grep "store i8" | count 3
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
index b34fc1e..a0e95a9 100644
--- a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
+++ b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 false} | count 2
+; RUN: opt < %s -instcombine -S | grep "ret i1 false" | count 2
 ; PR2329
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
index 2de5af7..acb259b 100644
--- a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
+++ b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
 ; PR2359
 define i1 @f(i8* %x) {
 entry:
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index 5416693..ed20690 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {xor}
+; RUN: opt < %s -instcombine -S | grep "xor"
 ; PR2389
 
 define i1 @test(i1 %a, i1 %b) {
diff --git a/test/Transforms/InstCombine/2008-05-31-Bools.ll b/test/Transforms/InstCombine/2008-05-31-Bools.ll
index a0fe47a..7c33f2d 100644
--- a/test/Transforms/InstCombine/2008-05-31-Bools.ll
+++ b/test/Transforms/InstCombine/2008-05-31-Bools.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S > %t
-; RUN: grep {xor} %t
-; RUN: grep {and} %t
-; RUN: not grep {div} %t
+; RUN: grep "xor" %t
+; RUN: grep "and" %t
+; RUN: not grep "div" %t
 
 define i1 @foo1(i1 %a, i1 %b) {
   %A = sub i1 %a, %b
diff --git a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
index 917d3ae..ec94623 100644
--- a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
+++ b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {phi i32} | count 2
+; RUN: opt < %s -instcombine -S | grep "phi i32" | count 2
 
 define void @test() nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
index 08959c9..cc46926 100644
--- a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
+++ b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store i32} | count 2
+; RUN: opt < %s -instcombine -S | grep "store i32" | count 2
 
 @g_139 = global i32 0           ; <i32*> [#uses=2]
 
diff --git a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
index aed1b14..bf5e96b 100644
--- a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
+++ b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store i8} | count 2
+; RUN: opt < %s -instcombine -S | grep "store i8" | count 2
 
 define i32 @a(i8* %s) nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
index c3371c6..80bd83b 100644
--- a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 15}
+; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 15"
 ; PR2479
 ; (See also PR1800.)
 
diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
index 4f4709b..9c4c1b5 100644
--- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
+++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {call.*llvm.stackrestore}
+; RUN: opt < %s -instcombine -S | grep "call.*llvm.stackrestore"
 ; PR2488
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
index 8245b4d..cfca72a 100644
--- a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
+++ b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp ne i32 \%a}
+; RUN: opt < %s -instcombine -S | grep "icmp ne i32 \%a"
 ; PR2330
 
 define i1 @foo(i32 %a) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
index 0091159..a3d44cb 100644
--- a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
+++ b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep -v {i32 8}
+; RUN: opt < %s -instcombine -S | grep -v "i32 8"
 ; PR2330
 
 define i32 @a(i32 %a) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index 1ed5323..dcf4bef 100644
--- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
 ; PR2496
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
index 47a7590..ed01414 100644
--- a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
+++ b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep {sub i32 0}
+; RUN: opt < %s -instcombine -S | not grep "sub i32 0"
 ; PR2330
 
 define i32 @foo(i32 %a) nounwind {
diff --git a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
index e911532..786f0c5 100644
--- a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
+++ b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {%C = xor i1 %A, true}
-; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep "%C = xor i1 %A, true"
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
 ; PR2539
 
 define i1 @test1(i1 %A) {
diff --git a/test/Transforms/InstCombine/2008-07-13-DivZero.ll b/test/Transforms/InstCombine/2008-07-13-DivZero.ll
index be1f8c2..18c99542 100644
--- a/test/Transforms/InstCombine/2008-07-13-DivZero.ll
+++ b/test/Transforms/InstCombine/2008-07-13-DivZero.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {lshr.*3}
-; RUN: opt < %s -instcombine -S | grep {call .*%cond}
+; RUN: opt < %s -instcombine -S | grep "lshr.*3"
+; RUN: opt < %s -instcombine -S | grep "call .*%cond"
 ; PR2506
 
 ; We can simplify the operand of udiv to '8', but not the operand to the
diff --git a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
index 501d8a6..b469887 100644
--- a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
+++ b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep {store }
+; RUN: opt < %s -instcombine -S | not grep "store "
 ; PR2296
 
 @G = common global double 0.000000e+00, align 16
diff --git a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
index 31ea94a..4d00d49 100644
--- a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
+++ b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {or i1}
+; RUN: opt < %s -instcombine -S | grep "or i1"
 ; PR2844
 
 define i32 @test(i32 %p_74) {
diff --git a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
index fd36d86..cf29f8d 100644
--- a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
+++ b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
 ; PR2697
 
 define i1 @x(i32 %x) nounwind {
diff --git a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
index aa077e2..679cc5f 100644
--- a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
+++ b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 true}
+; RUN: opt < %s -instcombine -S | grep "ret i1 true"
 ; PR2993
 
 define i1 @foo(i32 %x) {
diff --git a/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
index c636288..f33a1f5 100644
--- a/test/Transforms/InstCombine/2008-11-08-FCmp.ll
+++ b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -45,3 +45,12 @@ define i1 @test6(i32 %val) {
   ret i1 %2
 ; CHECK: ret i1 false
 }
+
+; Check that optimizing unsigned >= comparisons correctly distinguishes
+; positive and negative constants.  <rdar://problem/12029145>
+define i1 @test7(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp oge double %1, 3.200000e+00
+  ret i1 %2
+; CHECK: icmp ugt i32 %val, 3
+}
diff --git a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
index e4c7ebc..75bd5e0 100644
--- a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
+++ b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {i8 2, i8 2}
+; RUN: opt < %s -instcombine -S | grep "i8 2, i8 2"
 ; PR2756
 
 define <2 x i8> @foo(<2 x i8> %x) {
diff --git a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
index a61a94e..50ea2f4 100644
--- a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
+++ b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S > %t
-; RUN: grep {, align 4} %t | count 3
-; RUN: grep {, align 8} %t | count 3
+; RUN: grep ", align 4" %t | count 3
+; RUN: grep ", align 8" %t | count 3
 ; rdar://6480438
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index ce62f35..949fc59 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store.*addrspace(1)}
+; RUN: opt < %s -instcombine -S | grep "store.*addrspace(1)"
 ; PR3335
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
index 1421347..68c51b4 100644
--- a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
+++ b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF8000000000000 | count 12
-; RUN: opt < %s -simplifycfg -instcombine -S | grep {0\\.0} | count 3
-; RUN: opt < %s -simplifycfg -instcombine -S | grep {3\\.5} | count 1
+; RUN: opt < %s -simplifycfg -instcombine -S | grep "0\.0" | count 3
+; RUN: opt < %s -simplifycfg -instcombine -S | grep "3\.5" | count 1
 ;
 
 ; ModuleID = 'apf.c'
diff --git a/test/Transforms/InstCombine/2009-01-31-Pressure.ll b/test/Transforms/InstCombine/2009-01-31-Pressure.ll
index c3ee9a3..666b02e 100644
--- a/test/Transforms/InstCombine/2009-01-31-Pressure.ll
+++ b/test/Transforms/InstCombine/2009-01-31-Pressure.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {%B = add i8 %b, %x}
+; RUN: opt < %s -instcombine -S | grep "%B = add i8 %b, %x"
 ; PR2698
 
 declare void @use1(i1)
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index a51c47d..9146a8e 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -scalarrepl -S | not grep { = alloca}
+; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca"
 ; rdar://6417724
 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
 
diff --git a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
index f56fc38..cb8a77c 100644
--- a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
+++ b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 3679669}
+; RUN: opt < %s -instcombine -S | grep "ret i32 3679669"
 ; PR3595
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
index 0a07bf3..4d47977 100644
--- a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
+++ b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ashr i32 %val, 31}
+; RUN: opt < %s -instcombine -S | grep "ashr i32 %val, 31"
 ; PR3851
 
 define i32 @foo2(i32 %val) nounwind {
diff --git a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
index 244b22a..b79edf6 100644
--- a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
+++ b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {mul i64}
+; RUN: opt < %s -instcombine -S | grep "mul i64"
 ; rdar://6762288
 
 ; Instcombine should not promote the mul to i96 because it is definitely
diff --git a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
index e5355b8..468c1cd 100644
--- a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {store i32 0,}
+; RUN: opt < %s -instcombine -S | grep "store i32 0,"
 ; PR4366
 
 define void @a() {
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index 441d5f9..eb28994 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -5,8 +5,8 @@
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3162 = shl i8 %tmp3151, 5
-; CHECK: and i8 %tmp3162, 64
+; CHECK: %tmp3163 = shl i8 %tmp3162, 6
+; CHECK: and i8 %tmp3163, 64
 ; CHECK-NOT: shl
 ; CHECK-NOT: shr
   %tmp3161 = or i8 %tmp3151, -17
@@ -38,8 +38,8 @@ bb:
   %tmp10 = lshr i8 %tmp8, 7
   %tmp11 = shl i8 %tmp10, 5
 
-; CHECK: %0 = lshr i8 %tmp8, 2
-; CHECK: %tmp11 = and i8 %0, 32
+; CHECK: %tmp10 = lshr i8 %tmp8, 7
+; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
 
   %tmp12 = xor i8 %tmp11, %tmp9
   ret i8 %tmp12
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index 2f72b73..fedb46d 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -2,8 +2,10 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin10.0.0"
 
+; CHECK: define void @fu1
 define void @fu1(i32 %parm) nounwind ssp {
   %1 = alloca i32, align 4
+; CHECK: alloca double*
   %ptr = alloca double*, align 4
   store i32 %parm, i32* %1, align 4
   store double* null, double** %ptr, align 4
@@ -16,12 +18,12 @@ define void @fu1(i32 %parm) nounwind ssp {
   %6 = mul nsw i32 %5, 8
 ; With "nsw", the alloca and its bitcast can be fused:
   %7 = add nsw i32 %6, 2048
-; CHECK: alloca double*
+;  CHECK: alloca double
   %8 = alloca i8, i32 %7
   %9 = bitcast i8* %8 to double*
+; CHECK-NEXT: store double*
   store double* %9, double** %ptr, align 4
   br label %10
-
 ; <label>:10                                      ; preds = %4, %0
   %11 = load double** %ptr, align 4
   call void @bar(double* %11)
@@ -31,6 +33,7 @@ define void @fu1(i32 %parm) nounwind ssp {
 
 declare void @bar(double*)
 
+; CHECK: define void @fu2
 define void @fu2(i32 %parm) nounwind ssp {
   %1 = alloca i32, align 4
   %ptr = alloca double*, align 4
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
new file mode 100644
index 0000000..0907c490
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -0,0 +1,68 @@
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0"
+
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+}
+
+define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @constantMul() nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+}
+
+define <4 x i32> @constantMulS() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <4 x i32> @constantMulU() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+}
+
+define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %b = add <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %b = add <4 x i32> %x, %a
+  ret <4 x i32> %b  
+; CHECK: entry:
+; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %b
+}
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
diff --git a/test/Transforms/InstCombine/2012-04-24-vselect.ll b/test/Transforms/InstCombine/2012-04-24-vselect.ll
new file mode 100644
index 0000000..8d2de2b
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-04-24-vselect.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK: @foo
+; CHECK: <i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+define <8 x i32> @foo() nounwind {
+entry:
+  %v1.i = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>,
+    <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>,
+    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %v1.i
+}
+
diff --git a/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll b/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
new file mode 100644
index 0000000..2ec0a32
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
@@ -0,0 +1,61 @@
+; RUN: opt -inline -instcombine -S < %s
+; PR12967
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define signext i8 @fn1(i32 %p1) nounwind uwtable readnone ssp {
+entry:
+  %shr = lshr i32 1, %p1
+  %conv = trunc i32 %shr to i8
+  ret i8 %conv
+}
+
+define void @fn4() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @d, align 4, !tbaa !0
+  %cmp = icmp eq i32 %0, 0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @c, align 4, !tbaa !0
+  tail call void @fn3(i32 %conv) nounwind
+  ret void
+}
+
+define void @fn3(i32 %p1) nounwind uwtable ssp {
+entry:
+  %and = and i32 %p1, 8
+  store i32 %and, i32* @e, align 4, !tbaa !0
+  %sub = add nsw i32 %and, -1
+  store i32 %sub, i32* @f, align 4, !tbaa !0
+  %0 = load i32* @a, align 4, !tbaa !0
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* @b, align 4, !tbaa !0
+  %.lobit = lshr i32 %1, 31
+  %2 = trunc i32 %.lobit to i8
+  %.not = xor i8 %2, 1
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %call = tail call signext i8 @fn1(i32 %sub) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge.in = phi i8 [ %call, %if.else ], [ %.not, %if.then ]
+  %storemerge = sext i8 %storemerge.in to i32
+  store i32 %storemerge, i32* @b, align 4
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/InstCombine/2012-05-28-select-hang.ll b/test/Transforms/InstCombine/2012-05-28-select-hang.ll
new file mode 100644
index 0000000..c580bac
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-05-28-select-hang.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@c = common global i8 0, align 1
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @func() nounwind uwtable ssp {
+entry:
+  %0 = load i8* @c, align 1
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 1
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* @a, align 1
+  %conv2 = zext i8 %conv1 to i32
+  %neg = xor i32 %conv2, -1
+  %and = and i32 1, %neg
+  %conv3 = trunc i32 %and to i8
+  store i8 %conv3, i8* @b, align 1
+  %1 = load i8* @a, align 1
+  %conv4 = zext i8 %1 to i32
+  %conv5 = zext i8 %conv3 to i32
+  %tobool = icmp ne i32 %conv4, 0
+  br i1 %tobool, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry
+  %tobool8 = icmp ne i32 %conv5, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %2 = phi i1 [ false, %entry ], [ %tobool8, %land.rhs ]
+  %land.ext = zext i1 %2 to i32
+  %mul = mul nsw i32 3, %land.ext
+  %conv9 = trunc i32 %mul to i8
+  store i8 %conv9, i8* @a, align 1
+  ret void
+
+; CHECK: @func
+; CHECK-NOT: select
+}
diff --git a/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll b/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
new file mode 100644
index 0000000..22466a9
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
@@ -0,0 +1,162 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10889741>
+
+define void @func(double %r, double %g, double %b, double* %outH, double* %outS, double* %outL) nounwind uwtable ssp {
+bb:
+  %tmp = alloca double, align 8
+  %tmp1 = alloca double, align 8
+  %tmp2 = alloca double, align 8
+  store double %r, double* %tmp, align 8
+  store double %g, double* %tmp1, align 8
+  store double %b, double* %tmp2, align 8
+  %tmp3 = fcmp ogt double %r, %g
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb
+  %tmp5 = fcmp ogt double %r, %b
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb4
+  br label %bb12
+
+bb7:                                              ; preds = %bb4
+  br label %bb12
+
+bb8:                                              ; preds = %bb
+  %tmp9 = fcmp ogt double %g, %b
+  br i1 %tmp9, label %bb10, label %bb11
+
+bb10:                                             ; preds = %bb8
+  br label %bb12
+
+bb11:                                             ; preds = %bb8
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb10, %bb7, %bb6
+  %max.0 = phi double* [ %tmp, %bb6 ], [ %tmp2, %bb7 ], [ %tmp1, %bb10 ], [ %tmp2, %bb11 ]
+; CHECK: %tmp13 = load double* %tmp, align 8
+; CHECK: %tmp14 = load double* %tmp1, align 8
+; CHECK: %tmp15 = fcmp olt double %tmp13, %tmp14
+  %tmp13 = load double* %tmp, align 8
+  %tmp14 = load double* %tmp1, align 8
+  %tmp15 = fcmp olt double %tmp13, %tmp14
+  br i1 %tmp15, label %bb16, label %bb21
+
+bb16:                                             ; preds = %bb12
+  %tmp17 = load double* %tmp2, align 8
+  %tmp18 = fcmp olt double %tmp13, %tmp17
+  br i1 %tmp18, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb16
+  br label %bb26
+
+bb20:                                             ; preds = %bb16
+  br label %bb26
+
+bb21:                                             ; preds = %bb12
+  %tmp22 = load double* %tmp2, align 8
+  %tmp23 = fcmp olt double %tmp14, %tmp22
+  br i1 %tmp23, label %bb24, label %bb25
+
+bb24:                                             ; preds = %bb21
+  br label %bb26
+
+bb25:                                             ; preds = %bb21
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24, %bb20, %bb19
+  %min.0 = phi double* [ %tmp, %bb19 ], [ %tmp2, %bb20 ], [ %tmp1, %bb24 ], [ %tmp2, %bb25 ]
+; CHECK: %tmp27 = load double* %min.0, align 8
+; CHECK: %tmp28 = load double* %max.0
+; CHECK: %tmp29 = fadd double %tmp27, %tmp28
+  %tmp27 = load double* %min.0, align 8
+  %tmp28 = load double* %max.0
+  %tmp29 = fadd double %tmp27, %tmp28
+  %tmp30 = fdiv double %tmp29, 2.000000e+00
+  store double %tmp30, double* %outL
+  %tmp31 = load double* %min.0
+  %tmp32 = load double* %max.0
+  %tmp33 = fcmp oeq double %tmp31, %tmp32
+  br i1 %tmp33, label %bb34, label %bb35
+
+bb34:                                             ; preds = %bb26
+  store double 0.000000e+00, double* %outS
+  store double 0.000000e+00, double* %outH
+  br label %bb81
+
+bb35:                                             ; preds = %bb26
+  %tmp36 = fcmp olt double %tmp30, 5.000000e-01
+  %tmp37 = fsub double %tmp32, %tmp31
+  br i1 %tmp36, label %bb38, label %bb41
+
+bb38:                                             ; preds = %bb35
+  %tmp39 = fadd double %tmp32, %tmp31
+  %tmp40 = fdiv double %tmp37, %tmp39
+  store double %tmp40, double* %outS
+  br label %bb45
+
+bb41:                                             ; preds = %bb35
+  %tmp42 = fsub double 2.000000e+00, %tmp32
+  %tmp43 = fsub double %tmp42, %tmp31
+  %tmp44 = fdiv double %tmp37, %tmp43
+  store double %tmp44, double* %outS
+  br label %bb45
+
+bb45:                                             ; preds = %bb41, %bb38
+  %tmp46 = icmp eq double* %max.0, %tmp
+  br i1 %tmp46, label %bb47, label %bb55
+
+bb47:                                             ; preds = %bb45
+  %tmp48 = load double* %tmp1, align 8
+  %tmp49 = load double* %tmp2, align 8
+  %tmp50 = fsub double %tmp48, %tmp49
+  %tmp51 = load double* %max.0
+  %tmp52 = load double* %min.0
+  %tmp53 = fsub double %tmp51, %tmp52
+  %tmp54 = fdiv double %tmp50, %tmp53
+  store double %tmp54, double* %outH
+  br label %bb75
+
+bb55:                                             ; preds = %bb45
+  %tmp56 = icmp eq double* %max.0, %tmp1
+  br i1 %tmp56, label %bb57, label %bb66
+
+bb57:                                             ; preds = %bb55
+  %tmp58 = load double* %tmp2, align 8
+  %tmp59 = load double* %tmp, align 8
+  %tmp60 = fsub double %tmp58, %tmp59
+  %tmp61 = load double* %max.0
+  %tmp62 = load double* %min.0
+  %tmp63 = fsub double %tmp61, %tmp62
+  %tmp64 = fdiv double %tmp60, %tmp63
+  %tmp65 = fadd double 2.000000e+00, %tmp64
+  store double %tmp65, double* %outH
+  br label %bb75
+
+bb66:                                             ; preds = %bb55
+  %tmp67 = load double* %tmp, align 8
+  %tmp68 = load double* %tmp1, align 8
+  %tmp69 = fsub double %tmp67, %tmp68
+  %tmp70 = load double* %max.0
+  %tmp71 = load double* %min.0
+  %tmp72 = fsub double %tmp70, %tmp71
+  %tmp73 = fdiv double %tmp69, %tmp72
+  %tmp74 = fadd double 4.000000e+00, %tmp73
+  store double %tmp74, double* %outH
+  br label %bb75
+
+bb75:                                             ; preds = %bb66, %bb57, %bb47
+  %tmp76 = load double* %outH
+  %tmp77 = fdiv double %tmp76, 6.000000e+00
+  store double %tmp77, double* %outH
+  %tmp78 = fcmp olt double %tmp77, 0.000000e+00
+  br i1 %tmp78, label %bb79, label %bb81
+
+bb79:                                             ; preds = %bb75
+  %tmp80 = fadd double %tmp77, 1.000000e+00
+  store double %tmp80, double* %outH
+  br label %bb81
+
+bb81:                                             ; preds = %bb79, %bb75, %bb34
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
new file mode 100644
index 0000000..73e5a66
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR13442
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+@test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+define i64 @foo() {
+  %ret = load i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
+  ret i64 %ret
+  ; CHECK: ret i64 844424930263040
+}
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
new file mode 100644
index 0000000..6f3df5b
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: bitcast
+
+@base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
+declare void @foo(i32*)
+
+define void @test() nounwind {
+  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll b/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll
new file mode 100644
index 0000000..cb527f8
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: bitcast
+
+define void @foo(<16 x i8> %a, <16 x i8> %b, <4 x i32>* %c) {
+  %aa = bitcast <16 x i8> %a to <4 x i32>
+  %bb = bitcast <16 x i8> %b to <4 x i32>
+  %select_v = select <4 x i1> zeroinitializer, <4 x i32> %aa, <4 x i32> %bb
+  store <4 x i32> %select_v, <4 x i32>* %c, align 4
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/CPP_min_max.ll b/test/Transforms/InstCombine/CPP_min_max.ll
index 531ce2b..b3d081b 100644
--- a/test/Transforms/InstCombine/CPP_min_max.ll
+++ b/test/Transforms/InstCombine/CPP_min_max.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep select | not grep {i32\\*}
+; RUN:   grep select | not grep 'i32\*'
 
 ; This testcase corresponds to PR362, which notices that this horrible code
 ; is generated by the C++ front-end and LLVM optimizers, which has lots of
diff --git a/test/Transforms/InstCombine/JavaCompare.ll b/test/Transforms/InstCombine/JavaCompare.ll
index 46b6c19..8c1f307 100644
--- a/test/Transforms/InstCombine/JavaCompare.ll
+++ b/test/Transforms/InstCombine/JavaCompare.ll
@@ -1,7 +1,7 @@
 ; This is the sequence of stuff that the Java front-end expands for a single 
 ; <= comparison.  Check to make sure we turn it into a <= (only)
 
-; RUN: opt < %s -instcombine -S | grep {icmp sle i32 %A, %B}
+; RUN: opt < %s -instcombine -S | grep "icmp sle i32 %A, %B"
 
 define i1 @le(i32 %A, i32 %B) {
         %c1 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll
index cc57478..3edb392 100644
--- a/test/Transforms/InstCombine/add-shrink.ll
+++ b/test/Transforms/InstCombine/add-shrink.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {add nsw i32}
+; RUN: opt < %s -instcombine -S | grep "add nsw i32"
 ; RUN: opt < %s -instcombine -S | grep sext | count 1
 
 ; Should only have one sext and the add should be i32 instead of i64.
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 98a8cb4..40edf71 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {add nsw i32}
+; RUN: opt < %s -instcombine -S | grep "add nsw i32"
 
 define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
diff --git a/test/Transforms/InstCombine/addnegneg.ll b/test/Transforms/InstCombine/addnegneg.ll
index a3a09f2..ad8791d 100644
--- a/test/Transforms/InstCombine/addnegneg.ll
+++ b/test/Transforms/InstCombine/addnegneg.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep { sub } | count 1
+; RUN: opt < %s -instcombine -S | grep " sub " | count 1
 ; PR2047
 
 define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) {
diff --git a/test/Transforms/InstCombine/adjust-for-sminmax.ll b/test/Transforms/InstCombine/adjust-for-sminmax.ll
index b9b6f70..1fb7193 100644
--- a/test/Transforms/InstCombine/adjust-for-sminmax.ll
+++ b/test/Transforms/InstCombine/adjust-for-sminmax.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {icmp s\[lg\]t i32 %n, 0} | count 16
+; RUN: opt < %s -instcombine -S | grep "icmp s[lg]t i32 %n, 0" | count 16
 
 ; Instcombine should recognize that this code can be adjusted
 ; to fit the canonical smax/smin pattern.
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index eeca5c0..5bca46d 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
+; RUN: opt < %s -instcombine -S | grep "align 16" | count 1
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; A multi-dimensional array in a nested loop doing vector stores that
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index ef7185c..50e0347 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -5,8 +5,11 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @use(...)
 
-; Zero byte allocas should be deleted.
+@int = global i32 zeroinitializer
+
+; Zero byte allocas should be merged if they can't be deleted.
 ; CHECK: @test
+; CHECK: alloca
 ; CHECK-NOT: alloca
 define void @test() {
         %X = alloca [0 x i32]           ; <[0 x i32]*> [#uses=1]
@@ -15,6 +18,9 @@ define void @test() {
         call void (...)* @use( i32* %Y )
         %Z = alloca {  }                ; <{  }*> [#uses=1]
         call void (...)* @use( {  }* %Z )
+        %size = load i32* @int
+        %A = alloca {{}}, i32 %size
+        call void (...)* @use( {{}}* %A )
         ret void
 }
 
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 91868d1..838c2f7 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep fcmp | count 3
-; RUN: opt < %s -instcombine -S | grep ret | grep 0
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define zeroext i8 @t1(float %x, float %y) nounwind {
        %a = fcmp ueq float %x, %y
@@ -7,6 +6,11 @@ define zeroext i8 @t1(float %x, float %y) nounwind {
        %c = and i1 %a, %b
        %retval = zext i1 %c to i8
        ret i8 %retval
+; CHECK: t1
+; CHECK: fcmp oeq float %x, %y
+; CHECK-NOT: fcmp ueq float %x, %y
+; CHECK-NOT: fcmp ord float %x, %y
+; CHECK-NOW: and
 }
 
 define zeroext i8 @t2(float %x, float %y) nounwind {
@@ -15,6 +19,10 @@ define zeroext i8 @t2(float %x, float %y) nounwind {
        %c = and i1 %a, %b
        %retval = zext i1 %c to i8
        ret i8 %retval
+; CHECK: t2
+; CHECK: fcmp olt float %x, %y
+; CHECK-NOT: fcmp ord float %x, %y
+; CHECK-NOT: and
 }
 
 define zeroext i8 @t3(float %x, float %y) nounwind {
@@ -23,6 +31,8 @@ define zeroext i8 @t3(float %x, float %y) nounwind {
        %c = and i1 %a, %b
        %retval = zext i1 %c to i8
        ret i8 %retval
+; CHECK: t3
+; CHECK: ret i8 0
 }
 
 define zeroext i8 @t4(float %x, float %y) nounwind {
@@ -31,4 +41,39 @@ define zeroext i8 @t4(float %x, float %y) nounwind {
        %c = and i1 %a, %b
        %retval = zext i1 %c to i8
        ret i8 %retval
+; CHECK: t4
+; CHECK: fcmp one float %y, %x
+; CHECK-NOT: fcmp ord float %x, %y
+; CHECK-NOT: and
+}
+
+define zeroext i8 @t5(float %x, float %y) nounwind {
+       %a = fcmp ord float %x, %y
+       %b = fcmp uno float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+; CHECK: t5
+; CHECK: ret i8 0
+}
+
+define zeroext i8 @t6(float %x, float %y) nounwind {
+       %a = fcmp uno float %x, %y
+       %b = fcmp ord float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+; CHECK: t6
+; CHECK: ret i8 0
+}
+
+define zeroext i8 @t7(float %x, float %y) nounwind {
+       %a = fcmp uno float %x, %y
+       %b = fcmp ult float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+; CHECK: t7
+; CHECK: fcmp uno
+; CHECK-NOT: fcmp ult
 }
diff --git a/test/Transforms/InstCombine/and-not-or.ll b/test/Transforms/InstCombine/and-not-or.ll
index 9dce7b4..a42140b 100644
--- a/test/Transforms/InstCombine/and-not-or.ll
+++ b/test/Transforms/InstCombine/and-not-or.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {and i32 %x, %y} | count 4
-; RUN: opt < %s -instcombine -S | not grep {or}
+; RUN: opt < %s -instcombine -S | grep "and i32 %x, %y" | count 4
+; RUN: opt < %s -instcombine -S | not grep "or"
 
 define i32 @func1(i32 %x, i32 %y) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/and-or-and.ll b/test/Transforms/InstCombine/and-or-and.ll
index 216cd46..34cad82 100644
--- a/test/Transforms/InstCombine/and-or-and.ll
+++ b/test/Transforms/InstCombine/and-or-and.ll
@@ -10,7 +10,7 @@
 ; Which corresponds to test1.
 
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep {or }
+; RUN:   not grep "or "
 
 define i32 @test1(i32 %X, i32 %Y) {
         %A = and i32 %X, 7              ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index bd878b0..cc661d5 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | grep xor | count 4
 ; RUN: opt < %s -instcombine -S | not grep and
-; RUN: opt < %s -instcombine -S | not grep { or}
+; RUN: opt < %s -instcombine -S | not grep " or"
 
 ; PR1510
 
diff --git a/test/Transforms/InstCombine/and-or.ll b/test/Transforms/InstCombine/and-or.ll
index b4224b3..0ae12a3 100644
--- a/test/Transforms/InstCombine/and-or.ll
+++ b/test/Transforms/InstCombine/and-or.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {and i32 %a, 1} | count 4
-; RUN: opt < %s -instcombine -S | grep {or i32 %0, %b} | count 4
+; RUN: opt < %s -instcombine -S | grep "and i32 %a, 1" | count 4
+; RUN: opt < %s -instcombine -S | grep "or i32 %0, %b" | count 4
 
 
 define i32 @func1(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/Transforms/InstCombine/and-xor-or.ll b/test/Transforms/InstCombine/and-xor-or.ll
new file mode 100644
index 0000000..7ff810b
--- /dev/null
+++ b/test/Transforms/InstCombine/and-xor-or.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; rdar://10770603
+; (x & y) | (x ^ y) -> x | y 
+define i64 @or(i64 %x, i64 %y) nounwind uwtable readnone ssp {
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = add i64 %1, %2
+  ret i64 %3
+; CHECK: @or
+; CHECK: or i64
+; CHECK-NEXT: ret
+}
+
+; (x & y) + (x ^ y) -> x | y 
+define i64 @or2(i64 %x, i64 %y) nounwind uwtable readnone ssp {
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = or i64 %1, %2
+  ret i64 %3
+; CHECK: @or2
+; CHECK: or i64
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/apint-and-or-and.ll b/test/Transforms/InstCombine/apint-and-or-and.ll
index 17d29b6..43536d7 100644
--- a/test/Transforms/InstCombine/apint-and-or-and.ll
+++ b/test/Transforms/InstCombine/apint-and-or-and.ll
@@ -11,7 +11,7 @@
 ; 
 ; This tests arbitrary precision integers.
 
-; RUN: opt < %s -instcombine -S | not grep {or }
+; RUN: opt < %s -instcombine -S | not grep "or "
 ; END.
 
 define i17 @test1(i17 %X, i17 %Y) {
diff --git a/test/Transforms/InstCombine/apint-and1.ll b/test/Transforms/InstCombine/apint-and1.ll
index cd4cbb9..fcd2dcd 100644
--- a/test/Transforms/InstCombine/apint-and1.ll
+++ b/test/Transforms/InstCombine/apint-and1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that and instructions are properly eliminated.
 ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
 
-; RUN: opt < %s -instcombine -S | not grep {and }
+; RUN: opt < %s -instcombine -S | not grep "and "
 ; END.
 
 define i39 @test0(i39 %A) {
diff --git a/test/Transforms/InstCombine/apint-and2.ll b/test/Transforms/InstCombine/apint-and2.ll
index ae74472..78dc8f9 100644
--- a/test/Transforms/InstCombine/apint-and2.ll
+++ b/test/Transforms/InstCombine/apint-and2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that and instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 
-; RUN: opt < %s -instcombine -S | not grep {and }
+; RUN: opt < %s -instcombine -S | not grep "and "
 ; END.
 
 
diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll
index 1a3340a..818ae66 100644
--- a/test/Transforms/InstCombine/apint-shift-simplify.ll
+++ b/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    egrep {shl|lshr|ashr} | count 3
+; RUN:    egrep "shl|lshr|ashr" | count 3
 
 define i41 @test0(i41 %A, i41 %B, i41 %C) {
 	%X = shl i41 %A, %C
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index 0ea73a0..73f630e 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -47,13 +47,21 @@ define i32 @test5a(i32 %A) {
 }
 
 ; CHECK: @test6
-; CHECK-NOT: sh
+; CHECK: mul i55 %A, 6
 define i55 @test6(i55 %A) {
 	%B = shl i55 %A, 1		; <i55> [#uses=1]
 	%C = mul i55 %B, 3		; <i55> [#uses=1]
 	ret i55 %C
 }
 
+; CHECK: @test6a
+; CHECK: mul i55 %A, 6
+define i55 @test6a(i55 %A) {
+	%B = mul i55 %A, 3		; <i55> [#uses=1]
+	%C = shl i55 %B, 1		; <i55> [#uses=1]
+	ret i55 %C
+}
+
 ; CHECK: @test7
 ; CHECK-NOT: sh
 define i29 @test7(i8 %X) {
@@ -87,7 +95,8 @@ define i19 @test10(i19 %A) {
 }
 
 ; CHECK: @test11
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
 define i23 @test11(i23 %A) {
 	%a = mul i23 %A, 3		; <i23> [#uses=1]
 	%B = lshr i23 %a, 11		; <i23> [#uses=1]
@@ -104,7 +113,8 @@ define i47 @test12(i47 %A) {
 }
 
 ; CHECK: @test13
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
 define i18 @test13(i18 %A) {
 	%a = mul i18 %A, 3		; <i18> [#uses=1]
 	%B = ashr i18 %a, 8		; <i18> [#uses=1]
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index 8b9ff14..df8ec52 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -3,7 +3,7 @@
 ;
 
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep -v {sub i19 %Cok, %Bok} | grep -v {sub i25 0, %Aok} | not grep sub
+; RUN:   grep -v "sub i19 %Cok, %Bok" | grep -v "sub i25 0, %Aok" | not grep sub
 ; END.
 
 define i23 @test1(i23 %A) {
diff --git a/test/Transforms/InstCombine/apint-xor1.ll b/test/Transforms/InstCombine/apint-xor1.ll
index 849c659..01cbcf1 100644
--- a/test/Transforms/InstCombine/apint-xor1.ll
+++ b/test/Transforms/InstCombine/apint-xor1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that xor instructions are properly eliminated.
 ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
 
-; RUN: opt < %s -instcombine -S | not grep {xor }
+; RUN: opt < %s -instcombine -S | not grep "xor "
 
 
 define i47 @test1(i47 %A, i47 %B) {
diff --git a/test/Transforms/InstCombine/apint-xor2.ll b/test/Transforms/InstCombine/apint-xor2.ll
index cacc179..ab93c92 100644
--- a/test/Transforms/InstCombine/apint-xor2.ll
+++ b/test/Transforms/InstCombine/apint-xor2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that xor instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 
-; RUN: opt < %s -instcombine -S | not grep {xor }
+; RUN: opt < %s -instcombine -S | not grep "xor "
 ; END.
 
 
diff --git a/test/Transforms/InstCombine/badmalloc.ll b/test/Transforms/InstCombine/badmalloc.ll
index f5a623d..3abe28a 100644
--- a/test/Transforms/InstCombine/badmalloc.ll
+++ b/test/Transforms/InstCombine/badmalloc.ll
@@ -16,5 +16,26 @@ define i1 @test1() {
   ret i1 %B
 
 ; CHECK: @test1
-; CHECK: ret i1 %B
+; CHECK: ret i1 false
+}
+
+; CHECK: @test2
+define noalias i8* @test2() nounwind {
+entry:
+; CHECK: @malloc
+  %A = call noalias i8* @malloc(i64 4) nounwind
+; CHECK: icmp eq
+  %tobool = icmp eq i8* %A, null
+; CHECK: br i1
+  br i1 %tobool, label %return, label %if.end
+
+if.end:
+; CHECK: store
+  store i8 7, i8* %A
+  br label %return
+
+return:
+; CHECK: phi
+  %retval.0 = phi i8* [ %A, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
 }
diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll
index 79a096f..62c9ddc 100644
--- a/test/Transforms/InstCombine/bit-checks.ll
+++ b/test/Transforms/InstCombine/bit-checks.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {tobool}
+; RUN:    not grep "tobool"
 ; END.
 define i32 @main(i32 %argc, i8** %argv) nounwind ssp {
 entry:
diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll
index a6fd837..318ca73 100644
--- a/test/Transforms/InstCombine/bitcount.ll
+++ b/test/Transforms/InstCombine/bitcount.ll
@@ -1,5 +1,5 @@
 ; Tests to make sure bit counts of constants are folded
-; RUN: opt < %s -instcombine -S | grep {ret i32 19}
+; RUN: opt < %s -instcombine -S | grep "ret i32 19"
 ; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep -v declare | not grep llvm.ct
 
diff --git a/test/Transforms/InstCombine/bittest.ll b/test/Transforms/InstCombine/bittest.ll
index 92863d5..84ee7dd 100644
--- a/test/Transforms/InstCombine/bittest.ll
+++ b/test/Transforms/InstCombine/bittest.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -simplifycfg -S |\
-; RUN:    not grep {call void @abort}
+; RUN:    not grep "call void @abort"
 
 @b_rec.0 = external global i32          ; <i32*> [#uses=2]
 
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index 168b3e8..ba7df31 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,7 +1,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {call.*llvm.bswap} | count 6
+; RUN:    grep "call.*llvm.bswap" | count 6
 
 define i32 @test1(i32 %i) {
 	%tmp1 = lshr i32 %i, 24		; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 19d5a0a..56e5ca3 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -457,10 +457,12 @@ define i64 @test50(i64 %A) {
   %E = sext i32 %D to i64
   ret i64 %E
 ; CHECK: @test50
-; CHECK-NEXT: shl i64 %A, 30
+; lshr+shl will be handled by DAGCombine.
+; CHECK-NEXT: lshr i64 %A, 2
+; CHECK-NEXT: shl i64 %a, 32
 ; CHECK-NEXT: add i64 {{.*}}, -4294967296
-; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
-; CHECK-NEXT: ret i64 %sext
+; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %E
 }
 
 define i64 @test51(i64 %A, i1 %cond) {
@@ -677,3 +679,18 @@ define i64 @test_mmx_const(<2 x i32> %c) nounwind {
 ; CHECK: @test_mmx_const
 ; CHECK-NOT: x86_mmx
 }
+
+; PR12514
+define i1 @test67(i1 %a, i32 %b) {
+  %tmp2 = zext i1 %a to i32
+  %conv6 = xor i32 %tmp2, 1
+  %and = and i32 %b, %conv6
+  %sext = shl nuw nsw i32 %and, 24
+  %neg.i = xor i32 %sext, -16777216
+  %conv.i.i = ashr exact i32 %neg.i, 24
+  %trunc = trunc i32 %conv.i.i to i8
+  %tobool.i = icmp eq i8 %trunc, 0
+  ret i1 %tobool.i
+; CHECK: @test67
+; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index d5af532..2ef6ac6 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -132,12 +132,14 @@ define i32 @test5a() {
 }
 
 define void @test5() {
-       store i1 true, i1* undef
-       %1 = invoke i32 @test5a() to label %exit unwind label %exit
+  store i1 true, i1* undef
+  %r = invoke i32 @test5a() to label %exit unwind label %unwind
+unwind:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+          cleanup
+  br label %exit
 exit:
-       %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                cleanup
-       ret void
+  ret void
 }
 
 
diff --git a/test/Transforms/InstCombine/dce-iterate.ll b/test/Transforms/InstCombine/dce-iterate.ll
index 1d2cc53..1dd4522 100644
--- a/test/Transforms/InstCombine/dce-iterate.ll
+++ b/test/Transforms/InstCombine/dce-iterate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret double .sy}
+; RUN: opt < %s -instcombine -S | grep "ret double .sy"
 
 define internal double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/deadcode.ll b/test/Transforms/InstCombine/deadcode.ll
index 7c7f1ab..8fe673d 100644
--- a/test/Transforms/InstCombine/deadcode.ll
+++ b/test/Transforms/InstCombine/deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 %A}
+; RUN: opt < %s -instcombine -S | grep "ret i32 %A"
 ; RUN: opt < %s -die -S | not grep call.*llvm
 
 define i32 @test(i32 %A) {
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
new file mode 100644
index 0000000..a07f3ea
--- /dev/null
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @t1(i16 zeroext %x, i32 %y) nounwind {
+entry:
+; CHECK: t1
+; CHECK-NOT: sdiv
+; CHECK: lshr i32 %conv
+  %conv = zext i16 %x to i32
+  %s = shl i32 2, %y
+  %d = sdiv i32 %conv, %s
+  ret i32 %d
+}
+
+; rdar://11721329
+define i64 @t2(i64 %x, i32 %y) nounwind  {
+; CHECK: t2
+; CHECK-NOT: udiv
+; CHECK: lshr i64 %x
+  %1 = shl i32 1, %y
+  %2 = zext i32 %1 to i64
+  %3 = udiv i64 %x, %2
+  ret i64 %3
+}
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 9e9be7f..6645d99 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep alloca | grep {align 16}
+; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
diff --git a/test/Transforms/InstCombine/fp-ret-bitcast.ll b/test/Transforms/InstCombine/fp-ret-bitcast.ll
index 35ece42..b2fbc0b 100644
--- a/test/Transforms/InstCombine/fp-ret-bitcast.ll
+++ b/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {call float bitcast} | count 1
+; RUN:    grep "call float bitcast" | count 1
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 	%struct.NSObject = type { %struct.objc_class* }
  	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index a9ae221..eaff87d 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -637,3 +637,25 @@ define i1 @test62(i8* %a) {
 ; CHECK: @test62
 ; CHECK-NEXT: ret i1 true
 }
+
+define i1 @test63(i8 %a, i32 %b) nounwind {
+  %z = zext i8 %a to i32
+  %t = and i32 %b, 255
+  %c = icmp eq i32 %z, %t
+  ret i1 %c
+; CHECK: @test63
+; CHECK-NEXT: %1 = trunc i32 %b to i8
+; CHECK-NEXT: %c = icmp eq i8 %1, %a
+; CHECK-NEXT: ret i1 %c
+}
+
+define i1 @test64(i8 %a, i32 %b) nounwind {
+  %t = and i32 %b, 255
+  %z = zext i8 %a to i32
+  %c = icmp eq i32 %t, %z
+  ret i1 %c
+; CHECK: @test64
+; CHECK-NEXT: %1 = trunc i32 %b to i8
+; CHECK-NEXT: %c = icmp eq i8 %1, %a
+; CHECK-NEXT: ret i1 %c
+}
diff --git a/test/Transforms/InstCombine/invoke.ll b/test/Transforms/InstCombine/invoke.ll
new file mode 100644
index 0000000..04eaf86
--- /dev/null
+++ b/test/Transforms/InstCombine/invoke.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+declare i8* @_Znwm(i64)
+
+
+; CHECK: @f1
+define i64 @f1() nounwind uwtable ssp {
+entry:
+; CHECK: nvoke noalias i8* undef()
+  %call = invoke noalias i8* undef()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f2
+define i64 @f2() nounwind uwtable ssp {
+entry:
+; CHECK: nvoke noalias i8* null()
+  %call = invoke noalias i8* null()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f3
+define void @f3() nounwind uwtable ssp {
+; CHECK: invoke void @llvm.donothing()
+  %call = invoke noalias i8* @_Znwm(i64 13)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
diff --git a/test/Transforms/InstCombine/known_align.ll b/test/Transforms/InstCombine/known_align.ll
index 5382abf..0249951 100644
--- a/test/Transforms/InstCombine/known_align.ll
+++ b/test/Transforms/InstCombine/known_align.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {align 1}
+; RUN: opt < %s -instcombine -S | grep "align 1"
 ; END.
 
 	%struct.p = type <{ i8, i32 }>
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index 1d932d2..2263cb2 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {, align 16} | count 14
+; RUN: opt < %s -instcombine -S | grep ", align 16" | count 14
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @x = external global <2 x i64>, align 16
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index eae973d..4e3217d 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -1,17 +1,17 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1201
 define i32 @main(i32 %argc, i8** %argv) {
+; CHECK: @main
     %c_19 = alloca i8*
     %malloc_206 = tail call i8* @malloc(i32 mul (i32 ptrtoint (i8* getelementptr (i8* null, i32 1) to i32), i32 10))
     store i8* %malloc_206, i8** %c_19
     %tmp_207 = load i8** %c_19
     tail call void @free(i8* %tmp_207)
     ret i32 0
-; CHECK-NOT: malloc
-; CHECK-NOT: free
-; CHECK: ret i32 0
+; CHECK-NEXT: ret i32 0
 }
 
+declare noalias i8* @calloc(i32, i32) nounwind
 declare noalias i8* @malloc(i32)
 declare void @free(i8*)
 
@@ -26,13 +26,24 @@ define i1 @foo() {
 
 declare void @llvm.lifetime.start(i64, i8*)
 declare void @llvm.lifetime.end(i64, i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
 
-define void @test3() {
+define void @test3(i8* %src) {
 ; CHECK: @test3
 ; CHECK-NEXT: ret void
   %a = call noalias i8* @malloc(i32 10)
   call void @llvm.lifetime.start(i64 10, i8* %a)
   call void @llvm.lifetime.end(i64 10, i8* %a)
+  %size = call i64 @llvm.objectsize.i64(i8* %a, i1 true)
+  store i8 42, i8* %a
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i32 1, i1 false)
+  %alloc2 = call noalias i8* @calloc(i32 5, i32 7) nounwind
+  %z = icmp ne i8* %alloc2, null
   ret void
 }
 
@@ -46,3 +57,37 @@ define void @test4() {
   call void @free(i8* %C)
   ret void
 }
+
+; CHECK: @test5
+define void @test5(i8* %ptr, i8** %esc) {
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memmove
+; CHECK-NEXT: store
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memmove
+; CHECK-NEXT: call void @llvm.memset
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: ret
+  %a = call i8* @malloc(i32 700)
+  %b = call i8* @malloc(i32 700)
+  %c = call i8* @malloc(i32 700)
+  %d = call i8* @malloc(i32 700)
+  %e = call i8* @malloc(i32 700)
+  %f = call i8* @malloc(i32 700)
+  %g = call i8* @malloc(i32 700)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i32 1, i1 false)
+  store i8* %c, i8** %esc
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i32 1, i1 true)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i32 1, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i32 1, i1 true)
+  store volatile i8 4, i8* %g
+  ret void
+}
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index 04aac98..bcc9e18 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {load double}
+; RUN: opt < %s -instcombine -S | grep "load double"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 4602c12..9d51ea0 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that memmove instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | not grep {call void @llvm.memmove}
+; RUN: opt < %s -instcombine -S | not grep "call void @llvm.memmove"
 
 @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index 7f7bc9f..7f02dad 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
+; RUN: opt < %s -instcombine -S | not grep "call.*llvm.memset"
 
 define i32 @main() {
   %target = alloca [1024 x i8]
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index edb5305..6c8e634 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -138,8 +138,9 @@ define i32 @test16(i32 %b, i1 %c) {
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
-; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
-; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK: [[TEST16:%.*]] = zext i1 %c to i32
+; CHECK-NEXT: %1 = sub i32 0, [[TEST16]]
+; CHECK-NEXT: %e = and i32 %1, %b
 ; CHECK-NEXT: ret i32 %e
 }
 
diff --git a/test/Transforms/InstCombine/multi-use-or.ll b/test/Transforms/InstCombine/multi-use-or.ll
index 8c6a0e0..8b90e0d 100644
--- a/test/Transforms/InstCombine/multi-use-or.ll
+++ b/test/Transforms/InstCombine/multi-use-or.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {fadd double .sx, .sy}
+; RUN: opt < %s -instcombine -S | grep "fadd double .sx, .sy"
 ; The 'or' has multiple uses, make sure that this doesn't prevent instcombine
 ; from propagating the extends to the truncs.
 
diff --git a/test/Transforms/InstCombine/narrow.ll b/test/Transforms/InstCombine/narrow.ll
index 1b96a06..5dd13a0 100644
--- a/test/Transforms/InstCombine/narrow.ll
+++ b/test/Transforms/InstCombine/narrow.ll
@@ -1,7 +1,7 @@
 ; This file contains various testcases that check to see that instcombine
 ; is narrowing computations when possible.
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {ret i1 false}
+; RUN:    grep "ret i1 false"
 
 ; test1 - Eliminating the casts in this testcase (by narrowing the AND
 ; operation) allows instcombine to realize the function always returns false.
diff --git a/test/Transforms/InstCombine/objsize-64.ll b/test/Transforms/InstCombine/objsize-64.ll
new file mode 100644
index 0000000..530e123
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-64.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare noalias i8* @malloc(i32) nounwind
+declare noalias i8* @_Znwm(i64)  ; new(unsigned long)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+; CHECK: @f1
+define i64 @f1(i8 **%esc) {
+  %call = call i8* @malloc(i32 4)
+  store i8* %call, i8** %esc
+  %size = call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+; CHECK: ret i64 4
+  ret i64 %size
+}
+
+
+; CHECK: @f2
+define i64 @f2(i8** %esc) nounwind uwtable ssp {
+entry:
+; CHECK: invoke noalias i8* @_Znwm(i64 13)
+  %call = invoke noalias i8* @_Znwm(i64 13)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 13
+  store i8* %call, i8** %esc
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 28ceb68..dbb0ffc 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -42,7 +42,7 @@ define i32 @f() nounwind {
 
 define i1 @baz() nounwind {
 ; CHECK: @baz
-; CHECK-NEXT: ret i1 true
+; CHECK-NEXT: objectsize
   %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
@@ -106,7 +106,7 @@ bb12:
 
 %struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
 
-define i32 @test4() nounwind ssp {
+define i32 @test4(i8** %esc) nounwind ssp {
 ; CHECK: @test4
 entry:
   %0 = alloca %struct.data, align 8
@@ -115,13 +115,14 @@ entry:
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
+  store i8* %1, i8** %esc
   ret i32 0
 }
 
 ; rdar://7782496
 @s = external global i8*
 
-define void @test5(i32 %n) nounwind ssp {
+define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK: @test5
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
@@ -130,7 +131,7 @@ entry:
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
-  ret void
+  ret i8* %0
 }
 
 define void @test6(i32 %n) nounwind ssp {
@@ -149,12 +150,91 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 declare noalias i8* @malloc(i32) nounwind
 
-define i32 @test7() {
+define i32 @test7(i8** %esc) {
 ; CHECK: @test7
   %alloc = call noalias i8* @malloc(i32 48) nounwind
+  store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 16
   %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
-; CHECK-NEXT: ret i32 32
+; CHECK: ret i32 32
+  ret i32 %objsize
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define i32 @test8(i8** %esc) {
+; CHECK: @test8
+  %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
+  store i8* %alloc, i8** %esc
+  %gep = getelementptr inbounds i8* %alloc, i32 5
+  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+; CHECK: ret i32 30
   ret i32 %objsize
 }
 
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare noalias i8* @strndup(i8* nocapture, i32) nounwind
+
+; CHECK: @test9
+define i32 @test9(i8** %esc) {
+  %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test10
+define i32 @test10(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 4
+  ret i32 %1
+}
+
+; CHECK: @test11
+define i32 @test11(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test12
+define i32 @test12(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test13
+define i32 @test13(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @PR13390
+define i32 @PR13390(i1 %bool, i8* %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+xpto:
+  %select = select i1 %bool, i8* %select, i8* %a
+  %select2 = select i1 %bool, i8* %a, i8* %select2
+  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %2 = add i32 %0, %1
+; CHECK: ret i32 undef
+  ret i32 %2
+
+return:
+  ret i32 42
+}
diff --git a/test/Transforms/InstCombine/odr-linkage.ll b/test/Transforms/InstCombine/odr-linkage.ll
index 61365b4..2ce6246 100644
--- a/test/Transforms/InstCombine/odr-linkage.ll
+++ b/test/Transforms/InstCombine/odr-linkage.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 10}
+; RUN: opt < %s -instcombine -S | grep "ret i32 10"
 
 @g1 = available_externally constant i32 1
 @g2 = linkonce_odr constant i32 2
diff --git a/test/Transforms/InstCombine/or-to-xor.ll b/test/Transforms/InstCombine/or-to-xor.ll
index 1495ee4..8847cb7 100644
--- a/test/Transforms/InstCombine/or-to-xor.ll
+++ b/test/Transforms/InstCombine/or-to-xor.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {xor i32 %a, %b} | count 4
-; RUN: opt < %s -instcombine -S | not grep {and}
+; RUN: opt < %s -instcombine -S | grep "xor i32 %a, %b" | count 4
+; RUN: opt < %s -instcombine -S | not grep "and"
 
 define i32 @func1(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/Transforms/InstCombine/phi-merge-gep.ll b/test/Transforms/InstCombine/phi-merge-gep.ll
index 2671749..25c9cea 100644
--- a/test/Transforms/InstCombine/phi-merge-gep.ll
+++ b/test/Transforms/InstCombine/phi-merge-gep.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -S -instcombine > %t
-; RUN: grep {= getelementptr} %t | count 20
-; RUN: grep {= phi} %t | count 13
+; RUN: grep "= getelementptr" %t | count 20
+; RUN: grep "= phi" %t | count 13
 
 ; Don't push the geps through these phis, because they would require
 ; two phis each, which burdens the loop with high register pressure.
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 219545c..1c307d4 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -620,3 +620,13 @@ end:
 ; CHECK-NOT: phi i32
 ; CHECK: ret i1 %z
 }
+
+; CHECK: @test27(
+; CHECK: ret i32 undef
+define i32 @test27(i1 %b) {
+entry:
+  br label %done
+done:
+  %y = phi i32 [ undef, %entry ]
+  ret i32 %y
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
new file mode 100644
index 0000000..2b5c8f8
--- /dev/null
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @entry() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:
+  %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
+; CHECK: sub <1 x i32> <i32 92>, %local
+  %phi3 = sub <1 x i32> zeroinitializer, %local
+  br label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
+  br label %cond.end47
+
+cond.end47:
+  %sum = add <1 x i32> %cond, <i32 92>
+  %phi2 = sub <1 x i32> zeroinitializer, %sum
+  br label %for.cond
+}
diff --git a/test/Transforms/InstCombine/pr2645-0.ll b/test/Transforms/InstCombine/pr2645-0.ll
index 9bcaa43..e8aeb2a 100644
--- a/test/Transforms/InstCombine/pr2645-0.ll
+++ b/test/Transforms/InstCombine/pr2645-0.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {insertelement <4 x float> undef}
+; RUN: opt < %s -instcombine -S | grep "insertelement <4 x float> undef"
 
 ; Instcombine should be able to prove that none of the
 ; insertelement's first operand's elements are needed.
diff --git a/test/Transforms/InstCombine/sdiv-shift.ll b/test/Transforms/InstCombine/sdiv-shift.ll
deleted file mode 100644
index f4d2b36..0000000
--- a/test/Transforms/InstCombine/sdiv-shift.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep div
-
-define i32 @a(i16 zeroext %x, i32 %y) nounwind {
-entry:
-	%conv = zext i16 %x to i32
-	%s = shl i32 2, %y
-	%d = sdiv i32 %conv, %s
-	ret i32 %d
-}
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
index 18af152..946ea2b 100644
--- a/test/Transforms/InstCombine/select-crash.ll
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -30,3 +30,20 @@ define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
   %sel = select i1 %b, <4 x float> %a, <4 x float> %sub 
   ret <4 x float> %sel
 }
+
+; CHECK: @test3
+define i32 @test3(i1 %bool, i32 %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %select = select i1 %bool, i32 %a, i32 %select
+  %select2 = select i1 %bool, i32 %select2, i32 %a
+  %sum = add i32 %select, %select2
+  ret i32 %sum
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/InstCombine/select-load-call.ll b/test/Transforms/InstCombine/select-load-call.ll
index bef0cf8..b63468d 100644
--- a/test/Transforms/InstCombine/select-load-call.ll
+++ b/test/Transforms/InstCombine/select-load-call.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 1}
+; RUN: opt < %s -instcombine -S | grep "ret i32 1"
 
 declare void @test2()
 
diff --git a/test/Transforms/InstCombine/setcc-strength-reduce.ll b/test/Transforms/InstCombine/setcc-strength-reduce.ll
index 62ab116..138712e 100644
--- a/test/Transforms/InstCombine/setcc-strength-reduce.ll
+++ b/test/Transforms/InstCombine/setcc-strength-reduce.ll
@@ -3,7 +3,7 @@
 ; into equivalent setne,eq instructions.
 ;
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep -v {icmp eq} | grep -v {icmp ne} | not grep icmp
+; RUN:    grep -v "icmp eq" | grep -v "icmp ne" | not grep icmp
 ; END.
 
 define i1 @test1(i32 %A) {
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 52310e3..25e708b 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -65,8 +65,17 @@ define i32 @test6(i32 %A) {
 ; CHECK: @test6
 ; CHECK-NEXT: mul i32 %A, 6
 ; CHECK-NEXT: ret i32
-        %B = shl i32 %A, 1      ;; convert to an mul instruction 
-        %C = mul i32 %B, 3             
+        %B = shl i32 %A, 1      ;; convert to an mul instruction
+        %C = mul i32 %B, 3
+        ret i32 %C
+}
+
+define i32 @test6a(i32 %A) {
+; CHECK: @test6a
+; CHECK-NEXT: mul i32 %A, 6
+; CHECK-NEXT: ret i32
+        %B = mul i32 %A, 3
+        %C = shl i32 %B, 1      ;; convert to an mul instruction
         ret i32 %C
 }
 
@@ -97,7 +106,9 @@ define i8 @test9(i8 %A) {
         ret i8 %C
 }
 
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 7) << 7 === A & 128
+;; The shl may be valuable to scalar evolution.
 define i8 @test10(i8 %A) {
 ; CHECK: @test10
 ; CHECK-NEXT: and i8 %A, -128
@@ -107,11 +118,21 @@ define i8 @test10(i8 %A) {
         ret i8 %C
 }
 
+;; Allow the simplification when the lshr shift is exact.
+define i8 @test10a(i8 %A) {
+; CHECK: @test10a
+; CHECK-NEXT: ret i8 %A
+        %B = lshr exact i8 %A, 7
+        %C = shl i8 %B, 7
+        ret i8 %C
+}
+
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 3) << 4 === (A & 0x1F) << 1
+;; The shl may be valuable to scalar evolution.
 define i8 @test11(i8 %A) {
 ; CHECK: @test11
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = lshr i8 %a, 3              ; <i8> [#uses=1]
@@ -119,6 +140,18 @@ define i8 @test11(i8 %A) {
         ret i8 %C
 }
 
+;; Allow the simplification in InstCombine when the lshr shift is exact.
+define i8 @test11a(i8 %A) {
+; CHECK: @test11a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = lshr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
+;; This is deferred to DAGCombine unless %B is single-use.
 ;; (A >> 8) << 8 === A & -256
 define i32 @test12(i32 %A) {
 ; CHECK: @test12
@@ -129,11 +162,12 @@ define i32 @test12(i32 %A) {
         ret i32 %C
 }
 
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 3) << 4 === (A & -8) * 2
+;; The shl may be valuable to scalar evolution.
 define i8 @test13(i8 %A) {
 ; CHECK: @test13
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = ashr i8 %a, 3              ; <i8> [#uses=1]
@@ -141,6 +175,16 @@ define i8 @test13(i8 %A) {
         ret i8 %C
 }
 
+define i8 @test13a(i8 %A) {
+; CHECK: @test13a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = ashr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14(i32 %A) {
 ; CHECK: @test14
@@ -477,10 +521,11 @@ entry:
   %tmp49 = lshr i8 %tmp48, 5
   %tmp50 = mul i8 %tmp49, 64
   %tmp51 = xor i8 %tmp50, %tmp5
-; CHECK: and i8 %0, 16
   %tmp52 = and i8 %tmp51, -128
   %tmp53 = lshr i8 %tmp52, 7
+; CHECK: lshr i8 %tmp51, 7
   %tmp54 = mul i8 %tmp53, 16
+; CHECK: shl nuw nsw i8 %tmp53, 4
   %tmp55 = xor i8 %tmp54, %tmp51
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55
diff --git a/test/Transforms/InstCombine/shufflemask-undef.ll b/test/Transforms/InstCombine/shufflemask-undef.ll
index cf87aef..aa6baa9 100644
--- a/test/Transforms/InstCombine/shufflemask-undef.ll
+++ b/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep {shufflevector.\*i32 8}
+; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Transforms/InstCombine/shufflevec-constant.ll b/test/Transforms/InstCombine/shufflevec-constant.ll
index 29ae5a7..a002b2a 100644
--- a/test/Transforms/InstCombine/shufflevec-constant.ll
+++ b/test/Transforms/InstCombine/shufflevec-constant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>}
+; RUN: opt < %s -instcombine -S | grep "ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll
index 9a08c64..ab0e7e7 100644
--- a/test/Transforms/InstCombine/signed-comparison.ll
+++ b/test/Transforms/InstCombine/signed-comparison.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S > %t
 ; RUN: not grep zext %t
 ; RUN: not grep slt %t
-; RUN: grep {icmp ult} %t
+; RUN: grep "icmp ult" %t
 
 ; Instcombine should convert the zext+slt into a simple ult.
 
diff --git a/test/Transforms/InstCombine/srem-simplify-bug.ll b/test/Transforms/InstCombine/srem-simplify-bug.ll
index af824a4..3458714 100644
--- a/test/Transforms/InstCombine/srem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/srem-simplify-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
 ; PR2276
 
 define i1 @f(i32 %x) {
diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll
index 2fc8414..80c2ee8 100644
--- a/test/Transforms/InstCombine/stack-overalign.ll
+++ b/test/Transforms/InstCombine/stack-overalign.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {align 32} | count 1
+; RUN: opt < %s -instcombine -S | grep "align 32" | count 1
 
 ; It's tempting to have an instcombine in which the src pointer of a
 ; memcpy is aligned up to the alignment of the destination, however
diff --git a/test/Transforms/InstCombine/stacksaverestore.ll b/test/Transforms/InstCombine/stacksaverestore.ll
index 0fcaefa..f5c7a6f 100644
--- a/test/Transforms/InstCombine/stacksaverestore.ll
+++ b/test/Transforms/InstCombine/stacksaverestore.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {call.*stackrestore} | count 1
+; RUN: opt < %s -instcombine -S | grep "call.*stackrestore" | count 1
 
 declare i8* @llvm.stacksave()
 declare void @llvm.stackrestore(i8*)
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index 6ec342a..cbbad7f 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -12,8 +12,8 @@ define i64 @test1(i64 %a) {
   call void @use(i32 %b)
   ret i64 %d
 ; CHECK: @test1
-; CHECK: %d = and i64 %a, 15
-; CHECK: ret i64 %d
+; CHECK-NOT: ext
+; CHECK: ret
 }
 define i64 @test2(i64 %a) {
   %b = trunc i64 %a to i32
@@ -34,8 +34,8 @@ define i64 @test3(i64 %a) {
   call void @use(i32 %b)
   ret i64 %d
 ; CHECK: @test3
-; CHECK: %d = and i64 %a, 8
-; CHECK: ret i64 %d
+; CHECK-NOT: ext
+; CHECK: ret
 }
 define i64 @test4(i64 %a) {
   %b = trunc i64 %a to i32
@@ -46,8 +46,9 @@ define i64 @test4(i64 %a) {
   ret i64 %d
 ; CHECK: @test4
 ; CHECK: = and i64 %a, 8
-; CHECK: %d = xor i64 {{.*}}, 8
-; CHECK: ret i64 %d
+; CHECK: = xor i64 {{.*}}, 8
+; CHECK-NOT: ext
+; CHECK: ret
 }
 
 define i32 @test5(i32 %A) {
diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
index bfdd98c..064e721 100644
--- a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
+++ b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret i64 0} | count 2
+; RUN: opt < %s -instcombine -S | grep "ret i64 0" | count 2
 
 define i64 @foo(i32 %x) nounwind {
   %y = lshr i32 %x, 1
diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll
index 229f1a8..3e94ab5 100644
--- a/test/Transforms/InstCombine/urem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5}
+; RUN: opt < %s -instcombine -S | grep "= or i32 %x, -5"
 
 @.str = internal constant [5 x i8] c"foo\0A\00"		; <[5 x i8]*> [#uses=1]
 @.str1 = internal constant [5 x i8] c"bar\0A\00"		; <[5 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index cc63371..0019a57 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -162,4 +162,51 @@ entry:
   ret <4 x float> %shuffle9.i
 }
 
+define <2 x float> @test_fptrunc(double %f) {
+; CHECK: @test_fptrunc
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK-NOT: insertelement
+  %tmp9 = insertelement <4 x double> undef, double %f, i32 0
+  %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x double> %tmp11, double 0.000000e+00, i32 3
+  %tmp5 = fptrunc <4 x double> %tmp12 to <4 x float>
+  %ret = shufflevector <4 x float> %tmp5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %ret
+}
+
+define <2 x double> @test_fpext(float %f) {
+; CHECK: @test_fpext
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK-NOT: insertelement
+  %tmp9 = insertelement <4 x float> undef, float %f, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp5 = fpext <4 x float> %tmp12 to <4 x double>
+  %ret = shufflevector <4 x double> %tmp5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %ret
+}
+
+define <4 x float> @test_select(float %f, float %g) {
+; CHECK: @test_select
+; CHECK: %a0 = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NOT: insertelement
+; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
+; CHECK-NOT: insertelement
+; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+  %a0 = insertelement <4 x float> undef, float %f, i32 0
+  %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
+  %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
+  %a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
+  %b0 = insertelement <4 x float> undef, float %g, i32 0
+  %b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
+  %b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
+  %b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
+  %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
+  ret <4 x float> %ret
+}
+
 
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index eedf882..e35fa5e 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {ret <4 x i32> %A}
+; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A"
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll
index 2be43599..b4c41f6 100644
--- a/test/Transforms/InstCombine/vec_narrow.ll
+++ b/test/Transforms/InstCombine/vec_narrow.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {fadd float}
+; RUN: opt < %s -instcombine -S | grep "fadd float"
 
 
 define float @test(<4 x float> %A, <4 x float> %B, float %f) {
diff --git a/test/Transforms/InstCombine/vector-srem.ll b/test/Transforms/InstCombine/vector-srem.ll
index acb11c5..b1ed49e 100644
--- a/test/Transforms/InstCombine/vector-srem.ll
+++ b/test/Transforms/InstCombine/vector-srem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {srem <4 x i32>}
+; RUN: opt < %s -instcombine -S | grep "srem <4 x i32>"
 
 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u)
 {
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 2256678..7cab199 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep {store volatile}
-; RUN: opt < %s -instcombine -S | grep {load volatile}
+; RUN: opt < %s -instcombine -S | grep "store volatile"
+; RUN: opt < %s -instcombine -S | grep "load volatile"
 
 @x = weak global i32 0		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index a7bcdac..3722697 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {xor }
+; RUN:    not grep "xor "
 ; END.
 @G1 = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/InstCombine/zeroext-and-reduce.ll b/test/Transforms/InstCombine/zeroext-and-reduce.ll
index 592b8a1..315033d 100644
--- a/test/Transforms/InstCombine/zeroext-and-reduce.ll
+++ b/test/Transforms/InstCombine/zeroext-and-reduce.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {and i32 %Y, 8}
+; RUN:   grep "and i32 %Y, 8"
 
 define i32 @test1(i8 %X) {
         %Y = zext i8 %X to i32          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index 1164273..78bcedb 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -1,29 +1,16 @@
-; RUN: opt < %s -instcombine -S | not grep zext
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://11748024
 
-define i32 @a(i1 %x) {
+define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
-        %y = zext i1 %x to i32
-        %res = add i32 %y, 1
-        ret i32 %res
-}
-
-define i32 @b(i1 %x) {
-entry:
-        %y = zext i1 %x to i32
-        %res = add i32 %y, -1
-        ret i32 %res
-}
-
-define i32 @c(i1 %x) {
-entry:
-        %y = zext i1 %x to i32
-        %res = sub i32 0, %y
-        ret i32 %res
-}
-
-define i32 @d(i1 %x) {
-entry:
-        %y = zext i1 %x to i32
-        %res = sub i32 3, %y
-        ret i32 %res
+; CHECK: @a
+; CHECK: [[TMP1:%.*]] = zext i1 %y to i32
+; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
+; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]]
+  %conv = zext i1 %x to i32
+  %conv3 = zext i1 %y to i32
+  %conv3.neg = sub i32 0, %conv3
+  %sub = add i32 %conv, 1
+  %add = add i32 %sub, %conv3.neg
+  ret i32 %add
 }
diff --git a/test/Transforms/InstCombine/zext-fold.ll b/test/Transforms/InstCombine/zext-fold.ll
index 9521101..e5f316b 100644
--- a/test/Transforms/InstCombine/zext-fold.ll
+++ b/test/Transforms/InstCombine/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {zext } | count 1
+; RUN: opt < %s -instcombine -S | grep "zext " | count 1
 ; PR1570
 
 define i32 @test2(float %X, float %Y) {
diff --git a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
index b5d1065..6a50d4f 100644
--- a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
+++ b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -jump-threading -S | grep {ret i32 0}
+; RUN: opt < %s -jump-threading -S | grep "ret i32 0"
 ; PR3138
 
 define i32 @jt() {
diff --git a/test/Transforms/JumpThreading/2012-07-19-NoSuccessorIndirectBr.ll b/test/Transforms/JumpThreading/2012-07-19-NoSuccessorIndirectBr.ll
new file mode 100644
index 0000000..1c2c0c7
--- /dev/null
+++ b/test/Transforms/JumpThreading/2012-07-19-NoSuccessorIndirectBr.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -jump-threading
+; PR 13405
+; Just check that it doesn't crash / assert
+
+define i32 @f() nounwind {
+entry:
+  indirectbr i8* undef, []
+}
diff --git a/test/Transforms/JumpThreading/compare.ll b/test/Transforms/JumpThreading/compare.ll
index 581785c..9b05b44 100644
--- a/test/Transforms/JumpThreading/compare.ll
+++ b/test/Transforms/JumpThreading/compare.ll
@@ -1,5 +1,5 @@
 ; There should be no phi nodes left.
-; RUN: opt < %s -jump-threading  -S | not grep {phi i32}
+; RUN: opt < %s -jump-threading  -S | not grep "phi i32"
 
 declare i32 @f1()
 declare i32 @f2()
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index a4914f9..c6e9faa 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S -verify-dom-info -verify-loop-info > %t
-; RUN: grep {store volatile} %t | count 3
-; RUN: not grep {br label} %t
+; RUN: grep "store volatile" %t | count 3
+; RUN: not grep "br label" %t
 
 ; Jump threading should not prevent this loop from being unrolled.
 
diff --git a/test/Transforms/JumpThreading/phi-eq.ll b/test/Transforms/JumpThreading/phi-eq.ll
new file mode 100644
index 0000000..40d3c7e
--- /dev/null
+++ b/test/Transforms/JumpThreading/phi-eq.ll
@@ -0,0 +1,209 @@
+; RUN: llvm-as < %s | opt -jump-threading | llvm-dis | FileCheck %s
+; Test whether two consecutive switches with identical structures assign the
+; proper value to the proper variable.  This is really testing 
+; Instruction::isIdenticalToWhenDefined, as previously that function was 
+; returning true if the value part of the operands of two phis were identical, 
+; even if the incoming blocks were not.
+; NB: this function should be pruned down more.
+
+%struct._GList = type { i8*, %struct._GList*, %struct._GList* }
+%struct.filter_def = type { i8*, i8* }
+
+@capture_filters = external hidden global %struct._GList*, align 8
+@display_filters = external hidden global %struct._GList*, align 8
+@.str2 = external hidden unnamed_addr constant [10 x i8], align 1
+@__PRETTY_FUNCTION__.copy_filter_list = external hidden unnamed_addr constant [62 x i8], align 1
+@.str12 = external hidden unnamed_addr constant [22 x i8], align 1
+@.str13 = external hidden unnamed_addr constant [31 x i8], align 1
+@capture_edited_filters = external hidden global %struct._GList*, align 8
+@display_edited_filters = external hidden global %struct._GList*, align 8
+@__PRETTY_FUNCTION__.get_filter_list = external hidden unnamed_addr constant [44 x i8], align 1
+
+declare void @g_assertion_message(i8*, i8*, i32, i8*, i8*) noreturn
+
+declare void @g_free(i8*)
+
+declare %struct._GList* @g_list_first(%struct._GList*)
+
+declare noalias i8* @g_malloc(i64)
+
+define void @copy_filter_list(i32 %dest_type, i32 %src_type) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %entry
+  %cmp = icmp ne i32 %dest_type, %src_type
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %do.body
+  br label %if.end
+
+if.else:                                          ; preds = %do.body
+  call void @g_assertion_message_expr(i8* null, i8* getelementptr inbounds ([10 x i8]* @.str2, i32 0, i32 0), i32 581, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__.copy_filter_list, i32 0, i32 0), i8* getelementptr inbounds ([22 x i8]* @.str12, i32 0, i32 0)) noreturn
+  unreachable
+
+if.end:                                           ; preds = %if.then
+  br label %do.end
+
+do.end:                                           ; preds = %if.end
+  switch i32 %dest_type, label %sw.default.i [
+    i32 0, label %sw.bb.i
+    i32 1, label %sw.bb1.i
+    i32 2, label %sw.bb2.i
+    i32 3, label %sw.bb3.i
+  ]
+
+sw.bb.i:                                          ; preds = %do.end
+  br label %get_filter_list.exit
+
+sw.bb1.i:                                         ; preds = %do.end
+  br label %get_filter_list.exit
+
+sw.bb2.i:                                         ; preds = %do.end
+  br label %get_filter_list.exit
+
+sw.bb3.i:                                         ; preds = %do.end
+  br label %get_filter_list.exit
+
+sw.default.i:                                     ; preds = %do.end
+  call void @g_assertion_message(i8* null, i8* getelementptr inbounds ([10 x i8]* @.str2, i32 0, i32 0), i32 408, i8* getelementptr inbounds ([44 x i8]* @__PRETTY_FUNCTION__.get_filter_list, i32 0, i32 0), i8* null) noreturn nounwind
+  unreachable
+
+get_filter_list.exit:                             ; preds = %sw.bb3.i, %sw.bb2.i, %sw.bb1.i, %sw.bb.i
+  %0 = phi %struct._GList** [ @display_edited_filters, %sw.bb3.i ], [ @capture_edited_filters, %sw.bb2.i ], [ @display_filters, %sw.bb1.i ], [ @capture_filters, %sw.bb.i ]
+  switch i32 %src_type, label %sw.default.i5 [
+    i32 0, label %sw.bb.i1
+    i32 1, label %sw.bb1.i2
+    i32 2, label %sw.bb2.i3
+    i32 3, label %sw.bb3.i4
+  ]
+
+sw.bb.i1:                                         ; preds = %get_filter_list.exit
+  br label %get_filter_list.exit6
+
+sw.bb1.i2:                                        ; preds = %get_filter_list.exit
+  br label %get_filter_list.exit6
+
+sw.bb2.i3:                                        ; preds = %get_filter_list.exit
+  br label %get_filter_list.exit6
+
+sw.bb3.i4:                                        ; preds = %get_filter_list.exit
+  br label %get_filter_list.exit6
+
+sw.default.i5:                                    ; preds = %get_filter_list.exit
+  call void @g_assertion_message(i8* null, i8* getelementptr inbounds ([10 x i8]* @.str2, i32 0, i32 0), i32 408, i8* getelementptr inbounds ([44 x i8]* @__PRETTY_FUNCTION__.get_filter_list, i32 0, i32 0), i8* null) noreturn nounwind
+  unreachable
+
+; CHECK: get_filter_list.exit
+get_filter_list.exit6:                            ; preds = %sw.bb3.i4, %sw.bb2.i3, %sw.bb1.i2, %sw.bb.i1
+  %1 = phi %struct._GList** [ @display_edited_filters, %sw.bb3.i4 ], [ @capture_edited_filters, %sw.bb2.i3 ], [ @display_filters, %sw.bb1.i2 ], [ @capture_filters, %sw.bb.i1 ]
+; CHECK: %2 = load
+  %2 = load %struct._GList** %1, align 8
+; We should have jump-threading insert an additional load here for the value
+; coming out of the first switch, which is picked up by a subsequent phi
+; CHECK: {{%\.pr = load %[^%]* %0}}
+; CHECK-NEXT:  br label %while.cond
+  br label %while.cond
+
+; CHECK: while.cond
+while.cond:                                       ; preds = %while.body, %get_filter_list.exit6
+; CHECK: {{= phi .*%.pr}}
+  %3 = load %struct._GList** %0, align 8
+; CHECK: tobool
+  %tobool = icmp ne %struct._GList* %3, null
+  br i1 %tobool, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %4 = load %struct._GList** %0, align 8
+  %5 = load %struct._GList** %0, align 8
+  %call2 = call %struct._GList* @g_list_first(%struct._GList* %5)
+  %data.i = getelementptr inbounds %struct._GList* %call2, i32 0, i32 0
+  %6 = load i8** %data.i, align 8
+  %7 = bitcast i8* %6 to %struct.filter_def*
+  %name.i = getelementptr inbounds %struct.filter_def* %7, i32 0, i32 0
+  %8 = load i8** %name.i, align 8
+  call void @g_free(i8* %8) nounwind
+  %strval.i = getelementptr inbounds %struct.filter_def* %7, i32 0, i32 1
+  %9 = load i8** %strval.i, align 8
+  call void @g_free(i8* %9) nounwind
+  %10 = bitcast %struct.filter_def* %7 to i8*
+  call void @g_free(i8* %10) nounwind
+  %call.i = call %struct._GList* @g_list_remove_link(%struct._GList* %4, %struct._GList* %call2) nounwind
+  store %struct._GList* %call.i, %struct._GList** %0, align 8
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  br label %do.body4
+
+do.body4:                                         ; preds = %while.end
+  %11 = load %struct._GList** %0, align 8
+  %call5 = call i32 @g_list_length(%struct._GList* %11)
+  %cmp6 = icmp eq i32 %call5, 0
+  br i1 %cmp6, label %if.then7, label %if.else8
+
+if.then7:                                         ; preds = %do.body4
+  br label %if.end9
+
+if.else8:                                         ; preds = %do.body4
+  call void @g_assertion_message_expr(i8* null, i8* getelementptr inbounds ([10 x i8]* @.str2, i32 0, i32 0), i32 600, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__.copy_filter_list, i32 0, i32 0), i8* getelementptr inbounds ([31 x i8]* @.str13, i32 0, i32 0)) noreturn
+  unreachable
+
+if.end9:                                          ; preds = %if.then7
+  br label %do.end10
+
+do.end10:                                         ; preds = %if.end9
+  br label %while.cond11
+
+while.cond11:                                     ; preds = %cond.end, %do.end10
+  %cond10 = phi %struct._GList* [ %cond, %cond.end ], [ %2, %do.end10 ]
+  %tobool12 = icmp ne %struct._GList* %cond10, null
+  br i1 %tobool12, label %while.body13, label %while.end16
+
+while.body13:                                     ; preds = %while.cond11
+  %data = getelementptr inbounds %struct._GList* %cond10, i32 0, i32 0
+  %12 = load i8** %data, align 8
+  %13 = bitcast i8* %12 to %struct.filter_def*
+  %14 = load %struct._GList** %0, align 8
+  %name = getelementptr inbounds %struct.filter_def* %13, i32 0, i32 0
+  %15 = load i8** %name, align 8
+  %strval = getelementptr inbounds %struct.filter_def* %13, i32 0, i32 1
+  %16 = load i8** %strval, align 8
+  %call.i7 = call noalias i8* @g_malloc(i64 16) nounwind
+  %17 = bitcast i8* %call.i7 to %struct.filter_def*
+  %call1.i = call noalias i8* @g_strdup(i8* %15) nounwind
+  %name.i8 = getelementptr inbounds %struct.filter_def* %17, i32 0, i32 0
+  store i8* %call1.i, i8** %name.i8, align 8
+  %call2.i = call noalias i8* @g_strdup(i8* %16) nounwind
+  %strval.i9 = getelementptr inbounds %struct.filter_def* %17, i32 0, i32 1
+  store i8* %call2.i, i8** %strval.i9, align 8
+  %18 = bitcast %struct.filter_def* %17 to i8*
+  %call3.i = call %struct._GList* @g_list_append(%struct._GList* %14, i8* %18) nounwind
+  store %struct._GList* %call3.i, %struct._GList** %0, align 8
+  %tobool15 = icmp ne %struct._GList* %cond10, null
+  br i1 %tobool15, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %while.body13
+  %next = getelementptr inbounds %struct._GList* %cond10, i32 0, i32 1
+  %19 = load %struct._GList** %next, align 8
+  br label %cond.end
+
+cond.false:                                       ; preds = %while.body13
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi %struct._GList* [ %19, %cond.true ], [ null, %cond.false ]
+  br label %while.cond11
+
+while.end16:                                      ; preds = %while.cond11
+  ret void
+}
+
+declare void @g_assertion_message_expr(i8*, i8*, i32, i8*, i8*) noreturn
+
+declare i32 @g_list_length(%struct._GList*)
+
+declare noalias i8* @g_strdup(i8*)
+
+declare %struct._GList* @g_list_append(%struct._GList*, i8*)
+
+declare %struct._GList* @g_list_remove_link(%struct._GList*, %struct._GList*)
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index 1534585..dd43c88 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-simplify -lcssa -S | \
-; RUN:   grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
+; RUN:   grep "%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry"
 
         %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
 
diff --git a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
index ad4f144..575f816 100644
--- a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
+++ b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -lcssa -S | \
-; RUN:    grep {%X.1.lcssa}
+; RUN:    grep "%X.1.lcssa"
 ; RUN: opt < %s -lcssa -S | \
-; RUN:    not grep {%X.1.lcssa1}
+; RUN:    not grep "%X.1.lcssa1"
 
 declare i1 @c1()
 
diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll
index 23ab2c0..4b05ad9 100644
--- a/test/Transforms/LCSSA/basictest.ll
+++ b/test/Transforms/LCSSA/basictest.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -lcssa -S | \
-; RUN:   grep {X3.lcssa = phi i32}
+; RUN:   grep "X3.lcssa = phi i32"
 ; RUN: opt < %s -lcssa -S | \
-; RUN:   grep {X4 = add i32 3, %X3.lcssa}
+; RUN:   grep "X4 = add i32 3, %X3.lcssa"
 
 define void @lcssa(i1 %S2) {
 entry:
diff --git a/test/Transforms/LCSSA/unreachable-use.ll b/test/Transforms/LCSSA/unreachable-use.ll
index c389c9c..71ae134 100644
--- a/test/Transforms/LCSSA/unreachable-use.ll
+++ b/test/Transforms/LCSSA/unreachable-use.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -lcssa -S -verify-loop-info | grep {\[%\]tmp33 = load i1\\*\\* \[%\]tmp}
+; RUN: opt < %s -lcssa -S -verify-loop-info | grep "[%]tmp33 = load i1\*\* [%]tmp"
 ; PR6546
 
 ; LCSSA doesn't need to transform uses in blocks not reachable
diff --git a/test/Transforms/LCSSA/unused-phis.ll b/test/Transforms/LCSSA/unused-phis.ll
index aa2ab96..01b214b 100644
--- a/test/Transforms/LCSSA/unused-phis.ll
+++ b/test/Transforms/LCSSA/unused-phis.ll
@@ -2,9 +2,9 @@
 ; CHECK: exit1:
 ; CHECK: .lcssa =
 ; CHECK: exit2:
-; CHECK: .lcssa2 =
+; CHECK: .lcssa1 =
 ; CHECK: exit3:
-; CHECK-NOT: .lcssa1 =
+; CHECK-NOT: .lcssa
 
 ; Test to ensure that when there are multiple exit blocks, PHI nodes are
 ; only inserted by LCSSA when there is a use dominated by a given exit
diff --git a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
index 70a04c7..b54d520 100644
--- a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
+++ b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
@@ -4,7 +4,7 @@
 ; case... bad.
 
 ; RUN: opt < %s -licm -loop-deletion -simplifycfg -S | \
-; RUN:   not grep {br }
+; RUN:   not grep "br "
 
 define i32 @main(i32 %argc) {
 ; <label>:0
diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index 4df6ea7..94511cc 100644
--- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -S | grep {store volatile}
+; RUN: opt < %s -licm -S | grep "store volatile"
 ; PR1435
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index 4e100d3..f9fc551 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -stats -S |& grep "1 licm"
+; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
 
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
new file mode 100644
index 0000000..b016265
--- /dev/null
+++ b/test/Transforms/LICM/promote-order.ll
@@ -0,0 +1,41 @@
+; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
+
+; LICM should keep the stores in their original order when it sinks/promotes them.
+; rdar://12045203
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@p = external global i8*
+
+define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind {
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  store float 1.000000e+00, float* %tmp1, align 4, !tbaa !1
+  store i32 1, i32* %tmp3, align 4, !tbaa !2
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK: store float 1.000000e+00, float* %tmp1
+; CHECK: store i32 1, i32* %tmp3
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %split = phi i32* [ %tmp3, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %r.0.lcssa = phi i32* [ %split, %for.cond.for.end_crit_edge ], [ undef, %entry ]
+  ret i32* %r.0.lcssa
+}
+
+!0 = metadata !{metadata !"minimal TBAA"}
+!1 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !"int", metadata !0}
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 507b193..4c4d036 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -165,3 +165,25 @@ for.inc:                                          ; preds = %if.then, %for.body
 for.end:                                          ; preds = %for.inc, %entry
   ret void
 }
+
+; SDiv is unsafe to speculate inside an infinite loop.
+
+define void @unsafe_sdiv_c(i64 %a, i64 %b, i64* %p) {
+entry:
+; CHECK: entry:
+; CHECK-NOT: sdiv
+; CHECK: br label %for.body
+  br label %for.body
+
+for.body:
+  %c = icmp eq i64 %b, 0
+  br i1 %c, label %backedge, label %if.then
+
+if.then:
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %p
+  br label %backedge
+
+backedge:
+  br label %for.body
+}
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index 9cb55b4..a224777 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | not grep {\\\[ .tmp224} 
-; END.
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | FileCheck %s
+; CHECK-NOT: [ {{.}}tmp224
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/Transforms/LoopSimplify/indirectbr.ll b/test/Transforms/LoopSimplify/indirectbr.ll
index 9814d4a..ca05f43 100644
--- a/test/Transforms/LoopSimplify/indirectbr.ll
+++ b/test/Transforms/LoopSimplify/indirectbr.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \
-; RUN:   | grep -F {indirectbr i8* %x, \[label %L0, label %L1\]} \
+; RUN:   | grep -F "indirectbr i8* %x, [label %L0, label %L1]" \
 ; RUN:   | count 6
 
 ; LoopSimplify should not try to transform loops when indirectbr is involved.
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 40ad2f4..8de5938 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
 ; RUN: not grep sext %t
-; RUN: grep {phi i64} %t | count 1
+; RUN: grep "phi i64" %t | count 1
 
 ; Loopsimplify should be able to merge the two loop exits
 ; into one, so that loop rotate can rotate the loop, so
diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll
index 23ac7f2..854c612 100644
--- a/test/Transforms/LoopSimplify/preserve-scev.ll
+++ b/test/Transforms/LoopSimplify/preserve-scev.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>}
+; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep "%cmp = icmp slt i32" | grep "= {%\.ph,+,1}<%for.cond>"
 ; PR8079
 
 ; LoopSimplify should invalidate indvars when splitting out the
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
new file mode 100644
index 0000000..a122208
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -0,0 +1,90 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11356: likely wrong code bug
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+@g_66 = global [1 x i32] zeroinitializer, align 4
+@g_775 = global i32 0, align 4
+@g_752 = global i32 0, align 4
+@g_3 = global i32 0, align 4
+
+; Ensure that %div.i.i.us is not hoisted.
+; CHECK: @main
+; CHECK: for.body.i.i.us:
+; CHECK: %div.i.i.i.us
+; CHECK: %cmp5.i.i.us
+define i32 @main() nounwind uwtable ssp {
+entry:
+  %l_2 = alloca [1 x i32], align 4
+  %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
+  store i32 0, i32* %arrayidx, align 4, !tbaa !0
+  %tmp = load i32* @g_3, align 4, !tbaa !0
+  %idxprom = sext i32 %tmp to i64
+  %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
+  %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %conv.i.i = and i32 %tmp1, 65535
+  %tobool.i.i.i = icmp ne i32 %tmp, 0
+  br label %codeRepl
+
+codeRepl.loopexit.us-lcssa:                       ; preds = %for.body.i.i, %codeRepl5
+  br label %codeRepl.loopexit
+
+codeRepl.loopexit:                                ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %codeRepl.loopexit, %entry
+  br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge
+
+codeRepl.codeRepl.split_crit_edge:                ; preds = %codeRepl
+  br label %codeRepl.split
+
+codeRepl.split.us:                                ; preds = %codeRepl
+  br label %for.cond.i.i.us
+
+for.cond.i.i.us:                                  ; preds = %for.inc.i.i.us, %codeRepl.split.us
+  %tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ]
+  br label %codeRepl5.us
+
+for.inc.i.i.us:                                   ; preds = %for.body.i.i.us
+  %add.i.i.us = add nsw i32 %tmp2, 1
+  store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
+  br label %for.cond.i.i.us
+
+for.body.i.i.us:                                  ; preds = %codeRepl5.us
+  %div.i.i.i.us = udiv i32 1, %conv.i.i
+  %cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2
+  br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us
+
+codeRepl5.us:                                     ; preds = %for.cond.i.i.us
+  br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us
+
+codeRepl.loopexit.us-lcssa.us:                    ; preds = %codeRepl5.us, %for.body.i.i.us
+  br label %codeRepl.loopexit
+
+codeRepl.split:                                   ; preds = %codeRepl.codeRepl.split_crit_edge
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %for.inc.i.i, %codeRepl.split
+  %tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ]
+  br label %codeRepl5
+
+codeRepl5:                                        ; preds = %for.cond.i.i
+  br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %codeRepl5
+  %cmp5.i.i = icmp eq i32 0, %tmp3
+  br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i
+
+for.inc.i.i:                                      ; preds = %for.body.i.i
+  %add.i.i = add nsw i32 %tmp3, 1
+  store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
+  br label %for.cond.i.i
+
+func_4.exit:                                      ; No predecessors!
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
new file mode 100644
index 0000000..3793bac
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -0,0 +1,517 @@
+; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce %s 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+;
+; PR13361: LSR + SCEV "hangs" on reasonably sized test with sequence of loops
+;
+; Without limits on CollectSubexpr, we have thousands of formulae for
+; the use that crosses loops. With limits we have five.
+; CHECK: LSR on loop %bb221:
+; CHECK: After generating reuse formulae:
+; CHECK: LSR is examining the following uses:
+; CHECK: LSR Use: Kind=Special
+; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK-NOT:reg
+; CHECK: Filtering for use
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-freebsd9"
+
+%struct.snork = type { %struct.fuga, i32, i32, i32, i32, i32, i32 }
+%struct.fuga = type { %struct.gork, i64 }
+%struct.gork = type { i8*, i32, i32, %struct.noot* }
+%struct.noot = type opaque
+%struct.jim = type { [5120 x i8], i32, i32, [2048 x i8], i32, [256 x i8] }
+
+@global = external global %struct.snork, align 8
+@global1 = external hidden unnamed_addr constant [52 x i8], align 1
+@global2 = external hidden unnamed_addr constant [18 x i8], align 1
+@global3 = external hidden global %struct.jim, align 32
+@global4 = external hidden unnamed_addr constant [40 x i8], align 1
+
+declare void @snork(...) nounwind
+
+declare fastcc void @blarg() nounwind uwtable readonly
+
+define hidden fastcc void @boogle() nounwind uwtable {
+bb:
+  %tmp = trunc i64 0 to i32
+  %tmp1 = icmp slt i32 %tmp, 2047
+  %tmp2 = add i32 0, -1
+  %tmp3 = icmp ult i32 %tmp2, 255
+  %tmp4 = and i1 %tmp1, %tmp3
+  br i1 %tmp4, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb
+  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8]* @global1, i64 0, i64 0), i32 2021) nounwind
+  tail call void (...)* @snork(i8* getelementptr inbounds (%struct.jim* @global3, i64 0, i32 3, i64 1), i32 -2146631418) nounwind
+  unreachable
+
+bb6:                                              ; preds = %bb
+  tail call void @zot(i8* getelementptr inbounds (%struct.jim* @global3, i64 0, i32 5, i64 0), i8* getelementptr inbounds (%struct.jim* @global3, i64 0, i32 3, i64 1), i64 undef, i32 1, i1 false) nounwind
+  %tmp7 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 5, i64 undef
+  store i8 0, i8* %tmp7, align 1
+  %tmp8 = add nsw i32 0, 1
+  %tmp9 = sext i32 %tmp8 to i64
+  %tmp10 = add i64 %tmp9, 1
+  %tmp11 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 3, i64 %tmp10
+  %tmp12 = sub i64 2047, %tmp9
+  %tmp13 = icmp eq i32 undef, 1
+  br i1 %tmp13, label %bb14, label %bb15
+
+bb14:                                             ; preds = %bb6
+  tail call fastcc void @blarg()
+  unreachable
+
+bb15:                                             ; preds = %bb6
+  %tmp16 = trunc i64 %tmp12 to i32
+  br label %bb17
+
+bb17:                                             ; preds = %bb26, %bb15
+  %tmp18 = phi i64 [ %tmp28, %bb26 ], [ 0, %bb15 ]
+  %tmp19 = phi i32 [ %tmp29, %bb26 ], [ 0, %bb15 ]
+  %tmp20 = trunc i64 %tmp18 to i32
+  %tmp21 = icmp slt i32 %tmp20, %tmp16
+  br i1 %tmp21, label %bb22, label %bb32
+
+bb22:                                             ; preds = %bb17
+  %tmp23 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 3, i64 0
+  %tmp24 = load i8* %tmp23, align 1
+  %tmp25 = icmp eq i8 %tmp24, 58
+  br i1 %tmp25, label %bb30, label %bb26
+
+bb26:                                             ; preds = %bb22
+  %tmp27 = icmp eq i8 %tmp24, 0
+  %tmp28 = add i64 %tmp18, 1
+  %tmp29 = add nsw i32 %tmp19, 1
+  br i1 %tmp27, label %bb32, label %bb17
+
+bb30:                                             ; preds = %bb22
+  %tmp31 = icmp ult i32 undef, 255
+  br i1 %tmp31, label %bb33, label %bb32
+
+bb32:                                             ; preds = %bb30, %bb26, %bb17
+  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8]* @global1, i64 0, i64 0), i32 2038) nounwind
+  tail call void (...)* @snork(i8* %tmp11, i32 -2146631418) nounwind
+  unreachable
+
+bb33:                                             ; preds = %bb30
+  tail call void @zot(i8* getelementptr inbounds (%struct.jim* @global3, i64 0, i32 5, i64 0), i8* %tmp11, i64 undef, i32 1, i1 false) nounwind
+  %tmp34 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 5, i64 undef
+  store i8 0, i8* %tmp34, align 1
+  %tmp35 = add nsw i32 %tmp19, 1
+  %tmp36 = sext i32 %tmp35 to i64
+  %tmp37 = add i64 %tmp36, %tmp10
+  %tmp38 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 3, i64 %tmp37
+  %tmp39 = sub i64 %tmp12, %tmp36
+  br i1 false, label %bb40, label %bb41
+
+bb40:                                             ; preds = %bb33
+  br label %bb41
+
+bb41:                                             ; preds = %bb40, %bb33
+  %tmp42 = trunc i64 %tmp39 to i32
+  br label %bb43
+
+bb43:                                             ; preds = %bb52, %bb41
+  %tmp44 = phi i64 [ %tmp53, %bb52 ], [ 0, %bb41 ]
+  %tmp45 = phi i32 [ %tmp54, %bb52 ], [ 0, %bb41 ]
+  %tmp46 = trunc i64 %tmp44 to i32
+  %tmp47 = icmp slt i32 %tmp46, %tmp42
+  br i1 %tmp47, label %bb48, label %bb58
+
+bb48:                                             ; preds = %bb43
+  %tmp49 = add i64 %tmp44, %tmp37
+  %tmp50 = load i8* undef, align 1
+  %tmp51 = icmp eq i8 %tmp50, 58
+  br i1 %tmp51, label %bb55, label %bb52
+
+bb52:                                             ; preds = %bb48
+  %tmp53 = add i64 %tmp44, 1
+  %tmp54 = add nsw i32 %tmp45, 1
+  br i1 undef, label %bb58, label %bb43
+
+bb55:                                             ; preds = %bb48
+  %tmp56 = add i32 %tmp45, -1
+  %tmp57 = icmp ult i32 %tmp56, 255
+  br i1 %tmp57, label %bb59, label %bb58
+
+bb58:                                             ; preds = %bb55, %bb52, %bb43
+  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8]* @global1, i64 0, i64 0), i32 2055) nounwind
+  tail call void (...)* @snork(i8* %tmp38, i32 -2146631418) nounwind
+  br label %bb247
+
+bb59:                                             ; preds = %bb55
+  %tmp60 = sext i32 %tmp45 to i64
+  tail call void @zot(i8* getelementptr inbounds (%struct.jim* @global3, i64 0, i32 5, i64 0), i8* %tmp38, i64 %tmp60, i32 1, i1 false) nounwind
+  %tmp61 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 5, i64 %tmp60
+  store i8 0, i8* %tmp61, align 1
+  %tmp62 = add nsw i32 %tmp45, 1
+  %tmp63 = sext i32 %tmp62 to i64
+  %tmp64 = add i64 %tmp63, %tmp37
+  %tmp65 = sub i64 %tmp39, %tmp63
+  %tmp66 = icmp eq i32 undef, 2
+  br i1 %tmp66, label %bb67, label %bb68
+
+bb67:                                             ; preds = %bb59
+  tail call fastcc void @blarg()
+  unreachable
+
+bb68:                                             ; preds = %bb59
+  switch i32 undef, label %bb71 [
+    i32 0, label %bb74
+    i32 -1, label %bb69
+  ]
+
+bb69:                                             ; preds = %bb68
+  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8]* @global1, i64 0, i64 0), i32 2071) nounwind
+  %tmp70 = load i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 2), align 4
+  unreachable
+
+bb71:                                             ; preds = %bb68
+  %tmp72 = load i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 4), align 4
+  %tmp73 = icmp eq i32 undef, 0
+  br i1 %tmp73, label %bb247, label %bb74
+
+bb74:                                             ; preds = %bb71, %bb68
+  %tmp75 = trunc i64 %tmp65 to i32
+  br label %bb76
+
+bb76:                                             ; preds = %bb82, %bb74
+  %tmp77 = phi i64 [ %tmp84, %bb82 ], [ 0, %bb74 ]
+  %tmp78 = phi i32 [ %tmp85, %bb82 ], [ 0, %bb74 ]
+  %tmp79 = trunc i64 %tmp77 to i32
+  %tmp80 = icmp slt i32 %tmp79, %tmp75
+  br i1 %tmp80, label %bb81, label %bb87
+
+bb81:                                             ; preds = %bb76
+  br i1 false, label %bb86, label %bb82
+
+bb82:                                             ; preds = %bb81
+  %tmp83 = icmp eq i8 0, 0
+  %tmp84 = add i64 %tmp77, 1
+  %tmp85 = add nsw i32 %tmp78, 1
+  br i1 %tmp83, label %bb87, label %bb76
+
+bb86:                                             ; preds = %bb81
+  br i1 undef, label %bb88, label %bb87
+
+bb87:                                             ; preds = %bb86, %bb82, %bb76
+  unreachable
+
+bb88:                                             ; preds = %bb86
+  %tmp89 = add nsw i32 %tmp78, 1
+  %tmp90 = sext i32 %tmp89 to i64
+  %tmp91 = add i64 %tmp90, %tmp64
+  %tmp92 = sub i64 %tmp65, %tmp90
+  br i1 false, label %bb93, label %bb94
+
+bb93:                                             ; preds = %bb88
+  unreachable
+
+bb94:                                             ; preds = %bb88
+  %tmp95 = trunc i64 %tmp92 to i32
+  br label %bb96
+
+bb96:                                             ; preds = %bb102, %bb94
+  %tmp97 = phi i64 [ %tmp103, %bb102 ], [ 0, %bb94 ]
+  %tmp98 = phi i32 [ %tmp104, %bb102 ], [ 0, %bb94 ]
+  %tmp99 = trunc i64 %tmp97 to i32
+  %tmp100 = icmp slt i32 %tmp99, %tmp95
+  br i1 %tmp100, label %bb101, label %bb106
+
+bb101:                                            ; preds = %bb96
+  br i1 undef, label %bb105, label %bb102
+
+bb102:                                            ; preds = %bb101
+  %tmp103 = add i64 %tmp97, 1
+  %tmp104 = add nsw i32 %tmp98, 1
+  br i1 false, label %bb106, label %bb96
+
+bb105:                                            ; preds = %bb101
+  br i1 undef, label %bb107, label %bb106
+
+bb106:                                            ; preds = %bb105, %bb102, %bb96
+  br label %bb247
+
+bb107:                                            ; preds = %bb105
+  %tmp108 = add nsw i32 %tmp98, 1
+  %tmp109 = sext i32 %tmp108 to i64
+  %tmp110 = add i64 %tmp109, %tmp91
+  %tmp111 = sub i64 %tmp92, %tmp109
+  br i1 false, label %bb112, label %bb113
+
+bb112:                                            ; preds = %bb107
+  unreachable
+
+bb113:                                            ; preds = %bb107
+  %tmp114 = trunc i64 %tmp111 to i32
+  br label %bb115
+
+bb115:                                            ; preds = %bb121, %bb113
+  %tmp116 = phi i64 [ %tmp122, %bb121 ], [ 0, %bb113 ]
+  %tmp117 = phi i32 [ %tmp123, %bb121 ], [ 0, %bb113 ]
+  %tmp118 = trunc i64 %tmp116 to i32
+  %tmp119 = icmp slt i32 %tmp118, %tmp114
+  br i1 %tmp119, label %bb120, label %bb125
+
+bb120:                                            ; preds = %bb115
+  br i1 undef, label %bb124, label %bb121
+
+bb121:                                            ; preds = %bb120
+  %tmp122 = add i64 %tmp116, 1
+  %tmp123 = add nsw i32 %tmp117, 1
+  br i1 false, label %bb125, label %bb115
+
+bb124:                                            ; preds = %bb120
+  br i1 false, label %bb126, label %bb125
+
+bb125:                                            ; preds = %bb124, %bb121, %bb115
+  unreachable
+
+bb126:                                            ; preds = %bb124
+  %tmp127 = add nsw i32 %tmp117, 1
+  %tmp128 = sext i32 %tmp127 to i64
+  %tmp129 = add i64 %tmp128, %tmp110
+  %tmp130 = sub i64 %tmp111, %tmp128
+  tail call fastcc void @blarg()
+  br i1 false, label %bb132, label %bb131
+
+bb131:                                            ; preds = %bb126
+  unreachable
+
+bb132:                                            ; preds = %bb126
+  %tmp133 = trunc i64 %tmp130 to i32
+  br label %bb134
+
+bb134:                                            ; preds = %bb140, %bb132
+  %tmp135 = phi i64 [ %tmp141, %bb140 ], [ 0, %bb132 ]
+  %tmp136 = phi i32 [ %tmp142, %bb140 ], [ 0, %bb132 ]
+  %tmp137 = trunc i64 %tmp135 to i32
+  %tmp138 = icmp slt i32 %tmp137, %tmp133
+  br i1 %tmp138, label %bb139, label %bb144
+
+bb139:                                            ; preds = %bb134
+  br i1 false, label %bb143, label %bb140
+
+bb140:                                            ; preds = %bb139
+  %tmp141 = add i64 %tmp135, 1
+  %tmp142 = add nsw i32 %tmp136, 1
+  br i1 false, label %bb144, label %bb134
+
+bb143:                                            ; preds = %bb139
+  br i1 false, label %bb145, label %bb144
+
+bb144:                                            ; preds = %bb143, %bb140, %bb134
+  br label %bb247
+
+bb145:                                            ; preds = %bb143
+  %tmp146 = add nsw i32 %tmp136, 1
+  %tmp147 = sext i32 %tmp146 to i64
+  %tmp148 = add i64 %tmp147, %tmp129
+  %tmp149 = sub i64 %tmp130, %tmp147
+  switch i32 0, label %bb152 [
+    i32 0, label %bb150
+    i32 16, label %bb150
+    i32 32, label %bb150
+    i32 48, label %bb150
+    i32 64, label %bb150
+    i32 256, label %bb150
+    i32 4096, label %bb150
+  ]
+
+bb150:                                            ; preds = %bb145, %bb145, %bb145, %bb145, %bb145, %bb145, %bb145
+  %tmp151 = trunc i64 %tmp149 to i32
+  br label %bb153
+
+bb152:                                            ; preds = %bb145
+  unreachable
+
+bb153:                                            ; preds = %bb160, %bb150
+  %tmp154 = phi i64 [ %tmp161, %bb160 ], [ 0, %bb150 ]
+  %tmp155 = phi i32 [ %tmp162, %bb160 ], [ 0, %bb150 ]
+  %tmp156 = trunc i64 %tmp154 to i32
+  %tmp157 = icmp slt i32 %tmp156, %tmp151
+  br i1 %tmp157, label %bb158, label %bb166
+
+bb158:                                            ; preds = %bb153
+  %tmp159 = add i64 %tmp154, %tmp148
+  br i1 false, label %bb163, label %bb160
+
+bb160:                                            ; preds = %bb158
+  %tmp161 = add i64 %tmp154, 1
+  %tmp162 = add nsw i32 %tmp155, 1
+  br i1 false, label %bb166, label %bb153
+
+bb163:                                            ; preds = %bb158
+  %tmp164 = add i32 %tmp155, -1
+  %tmp165 = icmp ult i32 %tmp164, 255
+  br i1 %tmp165, label %bb167, label %bb166
+
+bb166:                                            ; preds = %bb163, %bb160, %bb153
+  unreachable
+
+bb167:                                            ; preds = %bb163
+  %tmp168 = add nsw i32 %tmp155, 1
+  %tmp169 = sext i32 %tmp168 to i64
+  %tmp170 = add i64 %tmp169, %tmp148
+  %tmp171 = sub i64 %tmp149, %tmp169
+  br i1 false, label %bb173, label %bb172
+
+bb172:                                            ; preds = %bb167
+  unreachable
+
+bb173:                                            ; preds = %bb167
+  %tmp174 = trunc i64 %tmp171 to i32
+  br label %bb175
+
+bb175:                                            ; preds = %bb181, %bb173
+  %tmp176 = phi i64 [ %tmp183, %bb181 ], [ 0, %bb173 ]
+  %tmp177 = phi i32 [ %tmp184, %bb181 ], [ 0, %bb173 ]
+  %tmp178 = trunc i64 %tmp176 to i32
+  %tmp179 = icmp slt i32 %tmp178, %tmp174
+  br i1 %tmp179, label %bb180, label %bb186
+
+bb180:                                            ; preds = %bb175
+  br i1 false, label %bb185, label %bb181
+
+bb181:                                            ; preds = %bb180
+  %tmp182 = icmp eq i8 0, 0
+  %tmp183 = add i64 %tmp176, 1
+  %tmp184 = add nsw i32 %tmp177, 1
+  br i1 %tmp182, label %bb186, label %bb175
+
+bb185:                                            ; preds = %bb180
+  br i1 false, label %bb187, label %bb186
+
+bb186:                                            ; preds = %bb185, %bb181, %bb175
+  unreachable
+
+bb187:                                            ; preds = %bb185
+  %tmp188 = add nsw i32 %tmp177, 1
+  %tmp189 = sext i32 %tmp188 to i64
+  %tmp190 = sub i64 %tmp171, %tmp189
+  br i1 false, label %bb192, label %bb191
+
+bb191:                                            ; preds = %bb187
+  unreachable
+
+bb192:                                            ; preds = %bb187
+  %tmp193 = trunc i64 %tmp190 to i32
+  br label %bb194
+
+bb194:                                            ; preds = %bb200, %bb192
+  %tmp195 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb192 ]
+  %tmp196 = phi i32 [ %tmp202, %bb200 ], [ 0, %bb192 ]
+  %tmp197 = trunc i64 %tmp195 to i32
+  %tmp198 = icmp slt i32 %tmp197, %tmp193
+  br i1 %tmp198, label %bb199, label %bb204
+
+bb199:                                            ; preds = %bb194
+  br i1 false, label %bb203, label %bb200
+
+bb200:                                            ; preds = %bb199
+  %tmp201 = add i64 %tmp195, 1
+  %tmp202 = add nsw i32 %tmp196, 1
+  br i1 false, label %bb204, label %bb194
+
+bb203:                                            ; preds = %bb199
+  br i1 undef, label %bb205, label %bb204
+
+bb204:                                            ; preds = %bb203, %bb200, %bb194
+  unreachable
+
+bb205:                                            ; preds = %bb203
+  %tmp206 = add nsw i32 %tmp196, 1
+  %tmp207 = sext i32 %tmp206 to i64
+  %tmp208 = add i64 %tmp207, 0
+  %tmp209 = sub i64 %tmp190, %tmp207
+  br i1 %tmp13, label %bb210, label %bb211
+
+bb210:                                            ; preds = %bb205
+  unreachable
+
+bb211:                                            ; preds = %bb205
+  %tmp212 = trunc i64 %tmp209 to i32
+  %tmp213 = icmp slt i32 0, %tmp212
+  br i1 false, label %bb215, label %bb214
+
+bb214:                                            ; preds = %bb211
+  unreachable
+
+bb215:                                            ; preds = %bb211
+  %tmp216 = add i64 undef, %tmp208
+  %tmp217 = sub i64 %tmp209, undef
+  br i1 false, label %bb218, label %bb219
+
+bb218:                                            ; preds = %bb215
+  br label %bb219
+
+bb219:                                            ; preds = %bb218, %bb215
+  %tmp220 = trunc i64 %tmp217 to i32
+  br label %bb221
+
+bb221:                                            ; preds = %bb230, %bb219
+  %tmp222 = phi i64 [ %tmp231, %bb230 ], [ 0, %bb219 ]
+  %tmp223 = phi i32 [ %tmp232, %bb230 ], [ 0, %bb219 ]
+  %tmp224 = trunc i64 %tmp222 to i32
+  %tmp225 = icmp slt i32 %tmp224, %tmp220
+  br i1 %tmp225, label %bb226, label %bb234
+
+bb226:                                            ; preds = %bb221
+  %tmp227 = add i64 %tmp222, %tmp216
+  %tmp228 = getelementptr inbounds %struct.jim* @global3, i64 0, i32 3, i64 %tmp227
+  %tmp229 = load i8* %tmp228, align 1
+  br i1 false, label %bb233, label %bb230
+
+bb230:                                            ; preds = %bb226
+  %tmp231 = add i64 %tmp222, 1
+  %tmp232 = add nsw i32 %tmp223, 1
+  br i1 undef, label %bb234, label %bb221
+
+bb233:                                            ; preds = %bb226
+  br i1 undef, label %bb235, label %bb234
+
+bb234:                                            ; preds = %bb233, %bb230, %bb221
+  br label %bb247
+
+bb235:                                            ; preds = %bb233
+  %tmp236 = add nsw i32 %tmp223, 1
+  %tmp237 = sext i32 %tmp236 to i64
+  %tmp238 = sub i64 %tmp217, %tmp237
+  br i1 %tmp66, label %bb239, label %bb240
+
+bb239:                                            ; preds = %bb235
+  unreachable
+
+bb240:                                            ; preds = %bb235
+  switch i32 0, label %bb244 [
+    i32 0, label %bb241
+    i32 1, label %bb241
+    i32 4, label %bb241
+    i32 6, label %bb241
+    i32 9, label %bb241
+  ]
+
+bb241:                                            ; preds = %bb240, %bb240, %bb240, %bb240, %bb240
+  %tmp242 = trunc i64 %tmp238 to i32
+  %tmp243 = icmp slt i32 0, %tmp242
+  br i1 false, label %bb246, label %bb245
+
+bb244:                                            ; preds = %bb240
+  unreachable
+
+bb245:                                            ; preds = %bb241
+  unreachable
+
+bb246:                                            ; preds = %bb241
+  unreachable
+
+bb247:                                            ; preds = %bb234, %bb144, %bb106, %bb71, %bb58
+  ret void
+}
+
+declare void @zot(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
new file mode 100644
index 0000000..b5124ea
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a8 | FileCheck %s
+;
+; LSR should only check for valid address modes when the IV user is a
+; memory address.
+; svn r158536, rdar://11635990
+;
+; Note that we still don't produce the best code here because we fail
+; to coalesce the IV. See <rdar://problem/11680670> [coalescer] IVs
+; need to be scheduled to expose coalescing.
+
+; LSR before the fix:
+;The chosen solution requires 4 regs, with addrec cost 1, plus 3 base adds, plus 2 setup cost:
+;  LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
+;    reg(%v3) + reg({0,+,-1}<%while.cond.i.i>) + imm(1)
+;  LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32
+;    reg(%v3) + reg({0,+,-1}<%while.cond.i.i>)
+;  LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
+;    reg((-4 + (4 * %v3) + %v1)) + 4*reg({0,+,-1}<%while.cond.i.i>)
+;  LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
+;    reg((-4 + (4 * %v3) + %v4)) + 4*reg({0,+,-1}<%while.cond.i.i>)
+;  LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
+;    reg(%v3)
+;
+; LSR after the fix:
+;The chosen solution requires 4 regs, with addrec cost 1, plus 1 base add, plus 2 setup cost:
+;  LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
+;    reg({%v3,+,-1}<nsw><%while.cond.i.i>) + imm(1)
+;  LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32
+;    reg({%v3,+,-1}<nsw><%while.cond.i.i>)
+;  LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
+;    reg((-4 + %v1)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>)
+;  LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
+;    reg((-4 + %v4)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>)
+;  LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
+;    reg(%v3)
+
+
+%s = type { i32* }
+
+@ncol = external global i32, align 4
+
+declare i32* @getptr() nounwind
+declare %s* @getstruct() nounwind
+
+; CHECK: @main
+; Check that the loop preheader contains no address computation.
+; CHECK: %end_of_chain
+; CHECK-NOT: add{{.*}}lsl
+; CHECK: ldr{{.*}}lsl #2
+; CHECK: ldr{{.*}}lsl #2
+define i32 @main() nounwind ssp {
+entry:
+  %v0 = load i32* @ncol, align 4, !tbaa !0
+  %v1 = tail call i32* @getptr() nounwind
+  %cmp10.i = icmp eq i32 %v0, 0
+  br label %while.cond.outer
+
+while.cond.outer:
+  %call18 = tail call %s* @getstruct() nounwind
+  br label %while.cond
+
+while.cond:
+  %cmp20 = icmp eq i32* %v1, null
+  br label %while.body
+
+while.body:
+  %v3 = load i32* @ncol, align 4, !tbaa !0
+  br label %end_of_chain
+
+end_of_chain:
+  %state.i = getelementptr inbounds %s* %call18, i32 0, i32 0
+  %v4 = load i32** %state.i, align 4, !tbaa !3
+  br label %while.cond.i.i
+
+while.cond.i.i:
+  %counter.0.i.i = phi i32 [ %v3, %end_of_chain ], [ %dec.i.i, %land.rhs.i.i ]
+  %dec.i.i = add nsw i32 %counter.0.i.i, -1
+  %tobool.i.i = icmp eq i32 %counter.0.i.i, 0
+  br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i
+
+land.rhs.i.i:
+  %arrayidx.i.i = getelementptr inbounds i32* %v4, i32 %dec.i.i
+  %v5 = load i32* %arrayidx.i.i, align 4, !tbaa !0
+  %arrayidx1.i.i = getelementptr inbounds i32* %v1, i32 %dec.i.i
+  %v6 = load i32* %arrayidx1.i.i, align 4, !tbaa !0
+  %cmp.i.i = icmp eq i32 %v5, %v6
+  br i1 %cmp.i.i, label %while.cond.i.i, label %equal_data.exit.i
+
+equal_data.exit.i:
+  ret i32 %counter.0.i.i
+
+where.exit:
+  br label %while.end.i
+
+while.end.i:
+  ret i32 %v3
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index ed32ca8..c3b8b89 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -90,3 +90,59 @@ for.inc498:                                       ; preds = %for.inc498, %for.bo
 while.end:                                        ; preds = %entry
   ret void
 }
+
+; PR12898: SCEVExpander crash
+; Test redundant phi elimination when the deleted phi's increment is
+; itself a phi.
+;
+; CHECK: @test3
+; CHECK: %for.body3.lr.ph.us.i.loopexit
+; CHECK-NEXT: in Loop: Header
+; CHECK-NEXT: incq
+; CHECK-NEXT: %for.body3.us.i
+; CHECK-NEXT: Inner Loop
+; CHECK: testb
+; CHECK: jne
+; CHECK: jmp
+define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
+entry:
+  br i1 undef, label %meshBB1, label %meshBB5
+
+for.inc8.us.i:                                    ; preds = %for.body3.us.i
+  br i1 undef, label %meshBB1, label %meshBB
+
+for.body3.us.i:                                   ; preds = %meshBB, %for.body3.lr.ph.us.i
+  %indvars.iv.i.SV.phi = phi i64 [ %indvars.iv.next.i, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
+  %storemerge13.us.i.SV.phi = phi i32 [ 0, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
+  %Opq.sa.calc12 = sub i32 undef, 227
+  %0 = add nsw i64 %indvars.iv.i.SV.phi, %indvars.iv8.i.SV.phi26
+  %1 = trunc i64 %0 to i32
+  %mul.i.us.i = mul nsw i32 0, %1
+  %arrayidx5.us.i = getelementptr inbounds double* %u, i64 %indvars.iv.i.SV.phi
+  %2 = load double* %arrayidx5.us.i, align 8
+  %indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1
+  br i1 undef, label %for.inc8.us.i, label %meshBB
+
+for.body3.lr.ph.us.i:                             ; preds = %meshBB1, %meshBB
+  %indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ]
+  %arrayidx.us.i = getelementptr inbounds double* undef, i64 %indvars.iv8.i.SV.phi26
+  %3 = add i64 %indvars.iv8.i.SV.phi26, 1
+  br label %for.body3.us.i
+
+for.inc8.us.i2:                                   ; preds = %meshBB5
+  unreachable
+
+eval_At_times_u.exit:                             ; preds = %meshBB5
+  ret void
+
+meshBB:                                           ; preds = %for.body3.us.i, %for.inc8.us.i
+  %indvars.iv8.i.SV.phi24 = phi i64 [ undef, %for.body3.us.i ], [ %3, %for.inc8.us.i ]
+  %meshStackVariable.phi = phi i32 [ %Opq.sa.calc12, %for.body3.us.i ], [ undef, %for.inc8.us.i ]
+  br i1 undef, label %for.body3.lr.ph.us.i, label %for.body3.us.i
+
+meshBB1:                                          ; preds = %for.inc8.us.i, %entry
+  br label %for.body3.lr.ph.us.i
+
+meshBB5:                                          ; preds = %entry
+  br i1 undef, label %eval_At_times_u.exit, label %for.inc8.us.i2
+}
diff --git a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
index 4136486..f7a82f6 100644
--- a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
+++ b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   not grep {bitcast i32 1 to i32}
+; RUN:   not grep "bitcast i32 1 to i32"
 ; END.
 ; The setlt wants to use a value that is incremented one more than the dominant
 ; IV.  Don't insert the 1 outside the loop, preventing folding it into the add.
diff --git a/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
index 4c5db04..d65213d 100644
--- a/test/Transforms/LoopStrengthReduce/dont_reverse.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-reduce -S \
-; RUN:    | grep {icmp eq i2 %lsr.iv.next, %xmp4344}
+; RUN:    | grep "icmp eq i2 %lsr.iv.next, %xmp4344"
 
 ; Don't reverse the iteration if the rhs of the compare is defined
 ; inside the loop.
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
index 2ca6787..5c18809 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
@@ -1,6 +1,6 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
+; RUN:   not grep "getelementptr.*%outer.*%INDVAR"
 
 target datalayout = "e-p:32:32:32-n8:16:32"
 declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
index 86c4d91..8eb8f05 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
@@ -1,6 +1,6 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
+; RUN:   not grep "getelementptr.*%outer.*%INDVAR"
 
 target datalayout = "e-p:32:32:32-n32"
 declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/pr2570.ll b/test/Transforms/LoopStrengthReduce/pr2570.ll
index 80efb9f..7b56971 100644
--- a/test/Transforms/LoopStrengthReduce/pr2570.ll
+++ b/test/Transforms/LoopStrengthReduce/pr2570.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 8
+; RUN: opt < %s -loop-reduce -S | grep "phi\>" | count 8
 ; PR2570
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 59f14fc..0118241 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}
+; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)"
 
 ; The value of %r is dependent on a polynomial iteration expression.
 
diff --git a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
index 5ed37dd..005e4c6 100644
--- a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
+++ b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   grep {add i32 %indvar630.ui, 1}
+; RUN:   grep "add i32 %indvar630.ui, 1"
 ;
 ; Make sure that the use of the IV outside of the loop (the store) uses the 
 ; post incremented value of the IV, not the preincremented value.  This 
diff --git a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
index 64ef4f9..3405b26 100644
--- a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
+++ b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
@@ -1,10 +1,10 @@
 ; Base should not be i*3, it should be i*2.
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   not grep {mul.*%i, 3}
+; RUN:   not grep "mul.*%i, 3"
 
 ; Indvar should not start at zero:
 ; RUN: opt < %s -loop-reduce -S | \
-; RUN:   not grep {phi i32 .* 0}
+; RUN:   not grep "phi i32 .* 0"
 ; END.
 
 ; mul uint %i, 3
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
index 20f2c2b..9d73d31 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-unswitch -stats -disable-output |& grep "1 loop-unswitch - Number of branches unswitched" | count 1
+; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | grep "1 loop-unswitch - Number of branches unswitched" | count 1
 ; PR 3170
 define i32 @a(i32 %x, i32 %y) nounwind {
 entry:
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index 8389fe4..c1fd588 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -35,11 +35,11 @@
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
 ; CHECK-NEXT:   %var_val.us2 = load i32* %var
 ; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
-; CHECK-NEXT:     i32 1, label %inc.us3
-; CHECK-NEXT:     i32 2, label %dec.us4
+; CHECK-NEXT:     i32 1, label %inc.us4
+; CHECK-NEXT:     i32 2, label %dec.us3
 ; CHECK-NEXT:   ]
 
-; CHECK:      dec.us4:                                          ; preds = %loop_begin.us1
+; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
 ; CHECK-NEXT:   call void @decf() noreturn nounwind
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
@@ -81,7 +81,7 @@ inc:
 dec:
   call void @decf() noreturn nounwind
   br label %loop_begin
-default:  
+default:
   br label %loop_exit
 loop_exit:
   ret i32 0
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 05d98d5..f3db471 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -19,15 +19,15 @@
 ; CHECK:        switch i32 1, label %second_switch.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
-; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us
-
 ; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
 ; CHECK-NEXT:   switch i32 %d, label %default.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 ; CHECK-NEXT:   ]
 
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin
 
@@ -73,7 +73,7 @@ inc:
   call void @incf() noreturn nounwind
   br label %loop_begin
 
-default:  
+default:
   br label %loop_begin
 
 loop_exit:
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 1b186d6..2708996 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -25,14 +25,14 @@
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
-; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us.us
-
 ; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
 ; CHECK-NEXT:   switch i32 1, label %default.us.us [
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin.us
 
@@ -41,10 +41,6 @@
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
-; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us
-
 ; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
 ; CHECK-NEXT:   switch i32 %d, label %default.us [
 ; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
@@ -53,6 +49,10 @@
 ; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
 ; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
 ; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
@@ -65,21 +65,21 @@
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
 ; CHECK-NEXT:   %var_val.us2 = load i32* %var
-; CHECK-NEXT:   switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us3 [
 ; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
 ; CHECK-NEXT:   ]
 
-; CHECK:      inc.us3:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us6
-
-; CHECK:      second_switch.us4:                                ; preds = %loop_begin.us1
+; CHECK:      second_switch.us3:                                ; preds = %loop_begin.us1
 ; CHECK-NEXT:   switch i32 1, label %default.us5 [
-; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:     i32 1, label %inc.us4
 ; CHECK-NEXT:   ]
 
+; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
-; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin
@@ -127,7 +127,7 @@ inc:
   call void @incf() noreturn nounwind
   br label %loop_begin
 
-default:  
+default:
   br label %loop_begin
 
 loop_exit:
diff --git a/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll b/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
new file mode 100644
index 0000000..4c63a56
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; PR12887
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @func() noreturn nounwind uwtable {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  %1 = load i32* @b, align 4
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ]
+  %conv = sext i8 %d.0 to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  %conv11 = zext i8 %d.0 to i32
+  %add = add i32 %1, %conv11
+  %conv2 = trunc i32 %add to i8
+  br label %while.body
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index cdfa0f3..cc77d3c 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -1,10 +1,99 @@
-; RUN: opt < %s -lowerswitch -S > %t
-; RUN: grep slt %t | count 10
-; RUN: grep ule %t | count 3
-; RUN: grep eq  %t | count 9
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+; We have switch on input.
+; On output we should got binary comparison tree. Check that all is fine.
+
+;CHECK:      entry:
+;CHECK-NEXT:   br label %NodeBlock37
+
+;CHECK:      NodeBlock37:                                      ; preds = %entry
+;CHECK-NEXT:   %Pivot38 = icmp ult i32 %tmp158, 11
+;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
+
+;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
+;CHECK-NEXT:   %Pivot36 = icmp ult i32 %tmp158, 14
+;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
+
+;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
+;CHECK-NEXT:   %Pivot34 = icmp ult i32 %tmp158, 15
+;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
+
+;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
+;CHECK-NEXT:   %Pivot32 = icmp ult i32 %tmp158, -6
+;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
+
+;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
+;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:   %SwitchLeaf30 = icmp ule i32 %tmp158.off, 4
+;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb338, label %NewDefault
+
+;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
+;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb334, label %NewDefault
+
+;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
+;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 14
+;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb332, label %NewDefault
+
+;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
+;CHECK-NEXT:   %Pivot24 = icmp ult i32 %tmp158, 12
+;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
+
+;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
+;CHECK-NEXT:   %Pivot22 = icmp ult i32 %tmp158, 13
+;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
+
+;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
+;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 13
+;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb330, label %NewDefault
+
+;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
+;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 12
+;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb328, label %NewDefault
+
+;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
+;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 11
+;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb326, label %NewDefault
+
+;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
+;CHECK-NEXT:   %Pivot14 = icmp ult i32 %tmp158, 8
+;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
+
+;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
+;CHECK-NEXT:   %Pivot12 = icmp ult i32 %tmp158, 9
+;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
+
+;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
+;CHECK-NEXT:   %Pivot10 = icmp ult i32 %tmp158, 10
+;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
+
+;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 10
+;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb324, label %NewDefault
+
+;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 9
+;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb322, label %NewDefault
+
+;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
+;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 8
+;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb338, label %NewDefault
+
+;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
+;CHECK-NEXT:   %Pivot = icmp ult i32 %tmp158, 7
+;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
+
+;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
+;CHECK-NEXT:   %SwitchLeaf2 = icmp eq i32 %tmp158, 7
+;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb, label %NewDefault
+
+;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
+;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
 entry:
+
         switch i32 %tmp158, label %bb336 [
                  i32 -2, label %bb338
                  i32 -3, label %bb338
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index b95ad91..d124be5 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp
 ; PR2077
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 24cf576..61ba3c7 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy."
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 %a = type { i32 }
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 63d0ebf..3fa1628 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -148,3 +148,25 @@ define void @test8() {
 }
 
 declare noalias i8* @malloc(i32)
+
+; rdar://11341081
+%struct.big = type { [50 x i32] }
+
+define void @test9() nounwind uwtable ssp {
+entry:
+; CHECK: test9
+; CHECK: f1
+; CHECK-NOT: memcpy
+; CHECK: f2
+  %b = alloca %struct.big, align 4
+  %tmp = alloca %struct.big, align 4
+  call void @f1(%struct.big* sret %tmp)
+  %0 = bitcast %struct.big* %b to i8*
+  %1 = bitcast %struct.big* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 200, i32 4, i1 false)
+  call void @f2(%struct.big* %b)
+  ret void
+}
+
+declare void @f1(%struct.big* sret)
+declare void @f2(%struct.big*)
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index 8eac7da..1bbb5fe 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy}
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/Transforms/MergeFunc/fold-weak.ll b/test/Transforms/MergeFunc/fold-weak.ll
index 23e4d33..4df6e39 100644
--- a/test/Transforms/MergeFunc/fold-weak.ll
+++ b/test/Transforms/MergeFunc/fold-weak.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -mergefunc -S > %t
-; RUN: grep {define weak} %t | count 2
-; RUN: grep {call} %t | count 2
+; RUN: grep "define weak" %t | count 2
+; RUN: grep "call" %t | count 2
 ; XFAIL: *
 
 ; This test is off for a bit as we change this particular sort of folding to
diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll
index 7b2a2fe..fd0baff 100644
--- a/test/Transforms/MergeFunc/phi-speculation1.ll
+++ b/test/Transforms/MergeFunc/phi-speculation1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mergefunc -stats -disable-output |& not grep {functions merged}
+; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged"
 
 define i32 @foo1(i32 %x) {
 entry:
diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll
index f080191..eec8b5c 100644
--- a/test/Transforms/MergeFunc/phi-speculation2.ll
+++ b/test/Transforms/MergeFunc/phi-speculation2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mergefunc -stats -disable-output |& grep {functions merged}
+; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged"
 
 define i32 @foo1(i32 %x) {
 entry:
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll
index 6954fce..4af079f 100644
--- a/test/Transforms/MergeFunc/vector.ll
+++ b/test/Transforms/MergeFunc/vector.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mergefunc -stats -disable-output < %s |& grep {functions merged}
+; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged"
 
 ; This test is checks whether we can merge
 ;   vector<intptr_t>::push_back(0)
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index ba2f778..d9bb3f2 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1871,6 +1871,30 @@ return:                                           ; preds = %if.then, %entry
   ret i8* %retval
 }
 
+; An objc_retain can serve as a may-use for a different pointer.
+; rdar://11931823
+
+; CHECK: define void @test66(
+; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+; CHECK:   tail call void @objc_release(i8* %cond) nounwind
+; CHECK: }
+define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind
+  tail call void @objc_release(i8* %cond) nounwind
+  ret void
+}
+
 declare void @bar(i32 ()*)
 
 ; A few real-world testcases.
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 4ff0596..2922f81 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
 
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
+declare void @use_pointer(i8*)
 
 @x = external global i8*
 
@@ -57,3 +58,112 @@ entry:
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
 }
+
+; Don't do this if there's a use of the old pointer value between the store
+; and the release.
+
+; CHECK:      define void @test3(i8* %newValue) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x1 = load i8** @x, align 8
+; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
+; CHECK-NEXT:   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test3(i8* %newValue) {
+entry:
+  %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+  %x1 = load i8** @x, align 8
+  store i8* %newValue, i8** @x, align 8
+  tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
+  tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Like test3, but with an icmp use instead of a call, for good measure.
+
+; CHECK:      define i1 @test4(i8* %newValue, i8* %foo) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x1 = load i8** @x, align 8
+; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
+; CHECK-NEXT:   %t = icmp eq i8* %x1, %foo
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   ret i1 %t
+; CHECK-NEXT: }
+define i1 @test4(i8* %newValue, i8* %foo) {
+entry:
+  %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+  %x1 = load i8** @x, align 8
+  store i8* %newValue, i8** @x, align 8
+  %t = icmp eq i8* %x1, %foo
+  tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+  ret i1 %t
+}
+
+; Do form an objc_storeStrong here, because the use is before the store.
+
+; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
+; CHECK: %t = icmp eq i8* %x1, %foo
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+define i1 @test5(i8* %newValue, i8* %foo) {
+entry:
+  %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+  %x1 = load i8** @x, align 8
+  %t = icmp eq i8* %x1, %foo
+  store i8* %newValue, i8** @x, align 8
+  tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+  ret i1 %t
+}
+
+; Like test5, but the release is before the store.
+
+; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
+; CHECK: %t = icmp eq i8* %x1, %foo
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+define i1 @test6(i8* %newValue, i8* %foo) {
+entry:
+  %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+  %x1 = load i8** @x, align 8
+  tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+  %t = icmp eq i8* %x1, %foo
+  store i8* %newValue, i8** @x, align 8
+  ret i1 %t
+}
+
+; Like test0, but there's no store, so don't form an objc_storeStrong.
+
+;      CHECK: define void @test7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test7(i8* %p) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %p) nounwind
+  %tmp = load i8** @x, align 8
+  tail call void @objc_release(i8* %tmp) nounwind
+  ret void
+}
+
+; Like test0, but there's no retain, so don't form an objc_storeStrong.
+
+;      CHECK: define void @test8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   store i8* %p, i8** @x, align 8
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test8(i8* %p) {
+entry:
+  %tmp = load i8** @x, align 8
+  store i8* %p, i8** @x, align 8
+  tail call void @objc_release(i8* %tmp) nounwind
+  ret void
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 69fa837..1510ed0 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -4,17 +4,17 @@
 %0 = type opaque
 %1 = type opaque
 %2 = type { i64, i64 }
-%3 = type { i8*, i8* }
 %4 = type opaque
 
 declare %0* @"\01-[NSAttributedString(Terminal) pathAtIndex:effectiveRange:]"(%1*, i8* nocapture, i64, %2*) optsize
 declare i8* @objc_retainAutoreleasedReturnValue(i8*)
-declare i8* @objc_msgSend_fixup(i8*, %3*, ...)
+declare i8* @objc_msgSend_fixup(i8*, i8*, ...)
+declare i8* @objc_msgSend(i8*, i8*, ...)
 declare void @objc_release(i8*)
 declare %2 @NSUnionRange(i64, i64, i64, i64) optsize
 declare i8* @objc_autoreleaseReturnValue(i8*)
 declare i8* @objc_autorelease(i8*)
-declare i8* @objc_msgSend() nonlazybind
+declare i32 @__gxx_personality_sj0(...)
 
 ; Don't get in trouble on bugpointed code.
 
@@ -52,7 +52,7 @@ bb6:                                              ; preds = %bb5, %bb4, %bb4, %b
 ; CHECK: %tmp8 = phi %0* [ %0, %bb ], [ %0, %bb ]
 define void @test1() {
 bb:
-  %tmp = tail call %0* bitcast (i8* ()* @objc_msgSend to %0* ()*)()
+  %tmp = tail call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* ()*)()
   %tmp2 = bitcast %0* %tmp to i8*
   %tmp3 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tmp2) nounwind
   br i1 undef, label %bb7, label %bb7
@@ -61,3 +61,30 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
   %tmp8 = phi %0* [ %tmp, %bb ], [ %tmp, %bb ]
   unreachable
 }
+
+; When looking for the defining instruction for an objc_retainAutoreleasedReturnValue
+; call, handle the case where it's an invoke in a different basic block.
+; rdar://11714057
+
+; CHECK: define void @_Z6doTestP8NSString() {
+; CHECK: invoke.cont:                                      ; preds = %entry
+; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+define void @_Z6doTestP8NSString() {
+entry:
+  %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  unreachable
+
+lpad:                                             ; preds = %entry
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  resume { i8*, i32 } undef
+}
+
+!clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
+
+!0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
new file mode 100644
index 0000000..08e2dce
--- /dev/null
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+; Handle a retain+release pair entirely contained within a split loop backedge.
+; rdar://11256239
+
+; CHECK: define void @test0
+; CHECK: call i8* @objc_retain(i8* %call) nounwind
+; CHECK: call i8* @objc_retain(i8* %call) nounwind
+; CHECK: call i8* @objc_retain(i8* %cond) nounwind
+; CHECK: call void @objc_release(i8* %call) nounwind
+; CHECK: call void @objc_release(i8* %call) nounwind
+; CHECK: call void @objc_release(i8* %cond) nounwind
+define void @test0() {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.cond
+  %call = invoke i8* @returner()
+          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+invoke.cont:                                      ; preds = %while.body
+  %t0 = call i8* @objc_retain(i8* %call) nounwind
+  %t1 = call i8* @objc_retain(i8* %call) nounwind
+  %call.i1 = invoke i8* @returner()
+          to label %invoke.cont1 unwind label %lpad
+
+invoke.cont1:                                     ; preds = %invoke.cont
+  %cond = select i1 undef, i8* null, i8* %call
+  %t2 = call i8* @objc_retain(i8* %cond) nounwind
+  call void @objc_release(i8* %call) nounwind
+  call void @objc_release(i8* %call) nounwind
+  call void @use_pointer(i8* %cond)
+  call void @objc_release(i8* %cond) nounwind
+  br label %while.body
+
+lpad:                                             ; preds = %invoke.cont, %while.body
+  %t4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  ret void
+}
+
+declare i8* @returner()
+declare i32 @__objc_personality_v0(...)
+declare void @objc_release(i8*)
+declare i8* @objc_retain(i8*)
+declare void @use_pointer(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/weak-dce.ll b/test/Transforms/ObjCARC/weak-dce.ll
new file mode 100644
index 0000000..f094671
--- /dev/null
+++ b/test/Transforms/ObjCARC/weak-dce.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -basicaa -objc-arc < %s | FileCheck %s
+; rdar://11434915
+
+; Delete the weak calls and replace them with just the net retain.
+
+;      CHECK: define void @test0(i8* %p) {
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: ret void
+
+define void @test0(i8* %p) {
+  %weakBlock = alloca i8*, align 8
+  %tmp7 = call i8* @objc_initWeak(i8** %weakBlock, i8* %p) nounwind
+  %tmp26 = call i8* @objc_loadWeakRetained(i8** %weakBlock) nounwind
+  call void @objc_destroyWeak(i8** %weakBlock) nounwind
+  ret void
+}
+
+;      CHECK: define i8* @test1(i8* %p) {
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: ret i8* %p
+
+define i8* @test1(i8* %p) {
+  %weakBlock = alloca i8*, align 8
+  %tmp7 = call i8* @objc_initWeak(i8** %weakBlock, i8* %p) nounwind
+  %tmp26 = call i8* @objc_loadWeakRetained(i8** %weakBlock) nounwind
+  call void @objc_destroyWeak(i8** %weakBlock) nounwind
+  ret i8* %tmp26
+}
+
+;      CHECK: define i8* @test2(i8* %p, i8* %q) {
+; CHECK-NEXT: call i8* @objc_retain(i8* %q)
+; CHECK-NEXT: ret i8* %q
+
+define i8* @test2(i8* %p, i8* %q) {
+  %weakBlock = alloca i8*, align 8
+  %tmp7 = call i8* @objc_initWeak(i8** %weakBlock, i8* %p) nounwind
+  %tmp19 = call i8* @objc_storeWeak(i8** %weakBlock, i8* %q) nounwind
+  %tmp26 = call i8* @objc_loadWeakRetained(i8** %weakBlock) nounwind
+  call void @objc_destroyWeak(i8** %weakBlock) nounwind
+  ret i8* %tmp26
+}
+
+declare i8* @objc_initWeak(i8**, i8*)
+declare void @objc_destroyWeak(i8**)
+declare i8* @objc_loadWeakRetained(i8**)
+declare i8* @objc_storeWeak(i8** %weakBlock, i8* %q)
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
new file mode 100644
index 0000000..ef9947f
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -0,0 +1,93 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+; XFAIL: *
+
+declare i32 @doo(...)
+
+; PR6627 - This whole nasty sequence should be flattened down to a single
+; 32-bit comparison.
+define void @test2(i8* %arrayidx) nounwind ssp {
+entry:
+  %xx = bitcast i8* %arrayidx to i32*
+  %x1 = load i32* %xx, align 4
+  %tmp = trunc i32 %x1 to i8
+  %conv = zext i8 %tmp to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2
+; CHECK: %x1 = load i32* %xx, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
+
+; PR6627 - This should all be flattened down to one compare.  This is the same
+; as test2, except that the initial load is done as an i8 instead of i32, thus
+; requiring widening.
+define void @test2a(i8* %arrayidx) nounwind ssp {
+entry:
+  %x1 = load i8* %arrayidx, align 4
+  %conv = zext i8 %x1 to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2a
+; CHECK: %x1 = load i32* {{.*}}, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index e5b2ba4..88ebca0 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -O3 -S %s | FileCheck %s
-; XFAIL: *
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
@@ -24,95 +23,29 @@ define void @test1() nounwind ssp {
 ; CHECK-NEXT: ret void
 }
 
-
-; PR6627 - This whole nasty sequence should be flattened down to a single
-; 32-bit comparison.
-define void @test2(i8* %arrayidx) nounwind ssp {
+; This function exposes a phase ordering problem when InstCombine is
+; turning %add into a bitmask, making it difficult to spot a 0 return value.
+;
+; It it also important that %add is expressed as a multiple of %div so scalar
+; evolution can recognize it.
+define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
 entry:
-  %xx = bitcast i8* %arrayidx to i32*
-  %x1 = load i32* %xx, align 4
-  %tmp = trunc i32 %x1 to i8
-  %conv = zext i8 %tmp to i32
-  %cmp = icmp eq i32 %conv, 127
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:                                    ; preds = %entry
-  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
-  %conv6 = zext i8 %tmp5 to i32
-  %cmp7 = icmp eq i32 %conv6, 69
-  br i1 %cmp7, label %land.lhs.true9, label %if.end
-
-land.lhs.true9:                                   ; preds = %land.lhs.true
-  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
-  %conv14 = zext i8 %tmp13 to i32
-  %cmp15 = icmp eq i32 %conv14, 76
-  br i1 %cmp15, label %land.lhs.true17, label %if.end
-
-land.lhs.true17:                                  ; preds = %land.lhs.true9
-  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
-  %conv22 = zext i8 %tmp21 to i32
-  %cmp23 = icmp eq i32 %conv22, 70
-  br i1 %cmp23, label %if.then, label %if.end
-
-if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
-  br label %if.end
-
-if.end:
-  ret void
+  %div = udiv i32 %a, 4
+  %arrayidx = getelementptr inbounds i32* %p, i64 0
+  store i32 %div, i32* %arrayidx, align 4
+  %add = add i32 %div, %div
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 1
+  store i32 %add, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32* %p, i64 1
+  %0 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %p, i64 0
+  %1 = load i32* %arrayidx3, align 4
+  %mul = mul i32 2, %1
+  %sub = sub i32 %0, %mul
+  ret i32 %sub
 
 ; CHECK: @test2
-; CHECK: %x1 = load i32* %xx, align 4
-; CHECK-NEXT: icmp eq i32 %x1, 1179403647
-; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+; CHECK: %div = lshr i32 %a, 2
+; CHECK: %add = shl nuw nsw i32 %div, 1
+; CHECK: ret i32 0
 }
-
-declare i32 @doo(...)
-
-; PR6627 - This should all be flattened down to one compare.  This is the same
-; as test2, except that the initial load is done as an i8 instead of i32, thus
-; requiring widening.
-define void @test2a(i8* %arrayidx) nounwind ssp {
-entry:
-  %x1 = load i8* %arrayidx, align 4
-  %conv = zext i8 %x1 to i32
-  %cmp = icmp eq i32 %conv, 127
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:                                    ; preds = %entry
-  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
-  %conv6 = zext i8 %tmp5 to i32
-  %cmp7 = icmp eq i32 %conv6, 69
-  br i1 %cmp7, label %land.lhs.true9, label %if.end
-
-land.lhs.true9:                                   ; preds = %land.lhs.true
-  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
-  %conv14 = zext i8 %tmp13 to i32
-  %cmp15 = icmp eq i32 %conv14, 76
-  br i1 %cmp15, label %land.lhs.true17, label %if.end
-
-land.lhs.true17:                                  ; preds = %land.lhs.true9
-  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
-  %conv22 = zext i8 %tmp21 to i32
-  %cmp23 = icmp eq i32 %conv22, 70
-  br i1 %cmp23, label %if.then, label %if.end
-
-if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
-  br label %if.end
-
-if.end:
-  ret void
-
-; CHECK: @test2a
-; CHECK: %x1 = load i32* {{.*}}, align 4
-; CHECK-NEXT: icmp eq i32 %x1, 1179403647
-; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
-}
-
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll
new file mode 100644
index 0000000..c731280
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -0,0 +1,64 @@
+; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+;
+; This file contains phase ordering tests for scalar evolution.
+; Test that the standard passes don't obfuscate the IR so scalar evolution can't
+; recognize expressions.
+
+; CHECK: test1
+; The loop body contains two increments by %div.
+; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
+; CHECK: -->  {%p,+,(2 * (%d /u 4) * sizeof(i32))}
+define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: test1a
+; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
+; CHECK: -->  {%p,+,(2 * (%d /u 2) * sizeof(i32))}
+define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll b/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
deleted file mode 100644
index 64aba46..0000000
--- a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -prune-eh -S | grep invoke
-
-declare void @External()
-
-define void @foo() {
-	invoke void @External( )
-			to label %Cont unwind label %Cont
-Cont:		; preds = %0, %0
-        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                 cleanup
-	ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/PruneEH/simplenoreturntest.ll b/test/Transforms/PruneEH/simplenoreturntest.ll
index 61e2f15..ec5d100 100644
--- a/test/Transforms/PruneEH/simplenoreturntest.ll
+++ b/test/Transforms/PruneEH/simplenoreturntest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -prune-eh -S | not grep {ret i32}
+; RUN: opt < %s -prune-eh -S | not grep "ret i32"
 
 declare void @noreturn() noreturn
 
diff --git a/test/Transforms/Reassociate/2005-08-24-Crash.ll b/test/Transforms/Reassociate/2005-08-24-Crash.ll
deleted file mode 100644
index 9864de4..0000000
--- a/test/Transforms/Reassociate/2005-08-24-Crash.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -reassociate -disable-output
-
-define void @test(i32 %a, i32 %b, i32 %c, i32 %d) {
-	%tmp.2 = xor i32 %a, %b		; <i32> [#uses=1]
-	%tmp.5 = xor i32 %c, %d		; <i32> [#uses=1]
-	%tmp.6 = xor i32 %tmp.2, %tmp.5		; <i32> [#uses=1]
-	%tmp.9 = xor i32 %c, %a		; <i32> [#uses=1]
-	%tmp.12 = xor i32 %b, %d		; <i32> [#uses=1]
-	%tmp.13 = xor i32 %tmp.9, %tmp.12		; <i32> [#uses=1]
-	%tmp.16 = xor i32 %tmp.6, %tmp.13		; <i32> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
index 33e44d4..f66148b 100644
--- a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
+++ b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -reassociate -instcombine -S |\
-; RUN:   grep {ret i32 0}
+; RUN:   grep "ret i32 0"
 
 define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 	%tmp.2 = add i32 %a4, %a3		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
new file mode 100644
index 0000000..2f5a53e
--- /dev/null
+++ b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; PR12169
+; PR12764
+; XFAIL: *
+; Transform disabled until PR13021 is fixed.
+
+define i64 @f(i64 %x0) {
+; CHECK: @f
+; CHECK-NEXT: mul i64 %x0, 208
+; CHECK-NEXT: add i64 %{{.*}}, 1617
+; CHECK-NEXT: ret i64
+  %t0 = add i64 %x0, 1
+  %t1 = add i64 %x0, 2
+  %t2 = add i64 %x0, 3
+  %t3 = add i64 %x0, 4
+  %t4 = add i64 %x0, 5
+  %t5 = add i64 %x0, 6
+  %t6 = add i64 %x0, 7
+  %t7 = add i64 %x0, 8
+  %t8 = add i64 %x0, 9
+  %t9 = add i64 %x0, 10
+  %t10 = add i64 %x0, 11
+  %t11 = add i64 %x0, 12
+  %t12 = add i64 %x0, 13
+  %t13 = add i64 %x0, 14
+  %t14 = add i64 %x0, 15
+  %t15 = add i64 %x0, 16
+  %t16 = add i64 %x0, 17
+  %t17 = add i64 %x0, 18
+  %t18 = add i64 %t17, %t0
+  %t19 = add i64 %t18, %t1
+  %t20 = add i64 %t19, %t2
+  %t21 = add i64 %t20, %t3
+  %t22 = add i64 %t21, %t4
+  %t23 = add i64 %t22, %t5
+  %t24 = add i64 %t23, %t6
+  %t25 = add i64 %t24, %t7
+  %t26 = add i64 %t25, %t8
+  %t27 = add i64 %t26, %t9
+  %t28 = add i64 %t27, %t10
+  %t29 = add i64 %t28, %t11
+  %t30 = add i64 %t29, %t12
+  %t31 = add i64 %t30, %t13
+  %t32 = add i64 %t31, %t14
+  %t33 = add i64 %t32, %t15
+  %t34 = add i64 %t33, %t16
+  %t35 = add i64 %t34, %x0
+  %t36 = add i64 %t0, %t1
+  %t37 = add i64 %t36, %t2
+  %t38 = add i64 %t37, %t3
+  %t39 = add i64 %t38, %t4
+  %t40 = add i64 %t39, %t5
+  %t41 = add i64 %t40, %t6
+  %t42 = add i64 %t41, %t7
+  %t43 = add i64 %t42, %t8
+  %t44 = add i64 %t43, %t9
+  %t45 = add i64 %t44, %t10
+  %t46 = add i64 %t45, %t11
+  %t47 = add i64 %t46, %t12
+  %t48 = add i64 %t47, %t13
+  %t49 = add i64 %t48, %t14
+  %t50 = add i64 %t49, %t15
+  %t51 = add i64 %t50, %t16
+  %t52 = add i64 %t51, %t17
+  %t53 = add i64 %t52, %t18
+  %t54 = add i64 %t53, %t19
+  %t55 = add i64 %t54, %t20
+  %t56 = add i64 %t55, %t21
+  %t57 = add i64 %t56, %t22
+  %t58 = add i64 %t57, %t23
+  %t59 = add i64 %t58, %t24
+  %t60 = add i64 %t59, %t25
+  %t61 = add i64 %t60, %t26
+  %t62 = add i64 %t61, %t27
+  %t63 = add i64 %t62, %t28
+  %t64 = add i64 %t63, %t29
+  %t65 = add i64 %t64, %t30
+  %t66 = add i64 %t65, %t31
+  %t67 = add i64 %t66, %t32
+  %t68 = add i64 %t67, %t33
+  %t69 = add i64 %t68, %t34
+  %t70 = add i64 %t69, %t35
+  %t71 = add i64 %t70, %x0
+  ret i64 %t71
+}
diff --git a/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll b/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll
new file mode 100644
index 0000000..6e62a28
--- /dev/null
+++ b/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -reassociate -disable-output
+; PR13041
+
+define void @foo() {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %b.0 = phi i32 [ undef, %entry ], [ %sub2, %while.body ]
+  %c.0 = phi i32 [ undef, %entry ], [ %sub3, %while.body ]
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  %sub = sub nsw i32 0, %b.0
+  %sub2 = sub nsw i32 %sub, %c.0
+  %sub3 = sub nsw i32 0, %c.0
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
diff --git a/test/Transforms/Reassociate/absorption.ll b/test/Transforms/Reassociate/absorption.ll
new file mode 100644
index 0000000..2ccc2b5
--- /dev/null
+++ b/test/Transforms/Reassociate/absorption.ll
@@ -0,0 +1,11 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+
+; Check that if constants combine to an absorbing value then the expression is
+; evaluated as the absorbing value.
+define i8 @foo(i8 %x) {
+  %tmp1 = or i8 %x, 127
+  %tmp2 = or i8 %tmp1, 128
+  ret i8 %tmp2
+; CHECK: @foo
+; CHECK: ret i8 -1
+}
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index 7a81942..ce586e1 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -67,3 +67,80 @@ _33:                                              ; preds = %_33, %_
   %tmp367 = add i32 %tmp365, %tmp366
   br label %_33
 }
+
+define void @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+  %tmp.2 = xor i32 %a, %b		; <i32> [#uses=1]
+  %tmp.5 = xor i32 %c, %d		; <i32> [#uses=1]
+  %tmp.6 = xor i32 %tmp.2, %tmp.5		; <i32> [#uses=1]
+  %tmp.9 = xor i32 %c, %a		; <i32> [#uses=1]
+  %tmp.12 = xor i32 %b, %d		; <i32> [#uses=1]
+  %tmp.13 = xor i32 %tmp.9, %tmp.12		; <i32> [#uses=1]
+  %tmp.16 = xor i32 %tmp.6, %tmp.13		; <i32> [#uses=0]
+  ret void
+}
+
+define i128 @foo() {
+  %mul = mul i128 0, 0
+  ret i128 %mul
+}
+
+define void @infinite_loop() {
+entry:
+  br label %loop
+loop:
+  %x = phi i32 [undef, %entry], [%x, %loop]
+  %dead = add i32 %x, 0
+  br label %loop
+unreachable1:
+  %y1 = add i32 %y1, 0
+  %z1 = add i32 %y1, 0
+  ret void
+unreachable2:
+  %y2 = add i32 %y2, 0
+  %z2 = add i32 %y2, %y2
+  ret void
+unreachable3:
+  %y3 = add i32 %y3, %y3
+  %z3 = add i32 %y3, 0
+  ret void
+unreachable4:
+  %y4 = add i32 %y4, %y4
+  %z4 = add i32 %y4, %y4
+  ret void
+}
+
+; PR13185
+define void @pr13185(i16 %p) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %x.0 = phi i32 [ undef, %entry ], [ %conv, %for.cond ]
+  %conv = zext i16 %p to i32
+  br label %for.cond
+}
+
+; PR12963
+@a = external global i8
+define i8 @f0(i8 %x) {
+  %t0 = load i8* @a
+  %t1 = mul i8 %x, %x
+  %t2 = mul i8 %t1, %t1
+  %t3 = mul i8 %t2, %t2
+  %t4 = mul i8 %t3, %x
+  %t5 = mul i8 %t4, %t4
+  %t6 = mul i8 %t5, %x
+  %t7 = mul i8 %t6, %t0
+  ret i8 %t7
+}
+
+define i32 @sozefx_(i32 %x, i32 %y) {
+  %t0 = sub i32 %x, %x
+  %t1 = mul i32 %t0, %t0
+  %t2 = mul i32 %x, %t0
+  %t3 = mul i32 %t1, %t1
+  %t4 = add i32 %t2, %t3
+  %t5 = mul i32 %x, %y
+  %t6 = add i32 %t4, %t5
+  ret i32 %t6
+}
diff --git a/test/Transforms/Reassociate/fp-commute.ll b/test/Transforms/Reassociate/fp-commute.ll
new file mode 100644
index 0000000..025689b
--- /dev/null
+++ b/test/Transforms/Reassociate/fp-commute.ll
@@ -0,0 +1,18 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+target triple = "armv7-apple-ios"
+
+declare void @use(float)
+
+; CHECK: test
+define void @test(float %x, float %y) {
+entry:
+; CHECK: fmul float %x, %y
+; CHECK: fmul float %x, %y
+  %0 = fmul float %x, %y
+  %1 = fmul float %y, %x
+  %2 = fsub float %0, %1
+  call void @use(float %0)
+  call void @use(float %2)
+  ret void
+}
diff --git a/test/Transforms/Reassociate/mightymul.ll b/test/Transforms/Reassociate/mightymul.ll
new file mode 100644
index 0000000..cfbc485
--- /dev/null
+++ b/test/Transforms/Reassociate/mightymul.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate
+; PR13021
+
+define i32 @foo(i32 %x) {
+  %t0 = mul i32 %x, %x
+  %t1 = mul i32 %t0, %t0
+  %t2 = mul i32 %t1, %t1
+  %t3 = mul i32 %t2, %t2
+  %t4 = mul i32 %t3, %t3
+  %t5 = mul i32 %t4, %t4
+  %t6 = mul i32 %t5, %t5
+  %t7 = mul i32 %t6, %t6
+  %t8 = mul i32 %t7, %t7
+  %t9 = mul i32 %t8, %t8
+  %t10 = mul i32 %t9, %t9
+  %t11 = mul i32 %t10, %t10
+  %t12 = mul i32 %t11, %t11
+  %t13 = mul i32 %t12, %t12
+  %t14 = mul i32 %t13, %t13
+  %t15 = mul i32 %t14, %t14
+  %t16 = mul i32 %t15, %t15
+  %t17 = mul i32 %t16, %t16
+  %t18 = mul i32 %t17, %t17
+  %t19 = mul i32 %t18, %t18
+  %t20 = mul i32 %t19, %t19
+  %t21 = mul i32 %t20, %t20
+  %t22 = mul i32 %t21, %t21
+  %t23 = mul i32 %t22, %t22
+  %t24 = mul i32 %t23, %t23
+  %t25 = mul i32 %t24, %t24
+  %t26 = mul i32 %t25, %t25
+  %t27 = mul i32 %t26, %t26
+  %t28 = mul i32 %t27, %t27
+  ret i32 %t28
+}
diff --git a/test/Transforms/Reassociate/mulfactor.ll b/test/Transforms/Reassociate/mulfactor.ll
index f279727..6c099b4 100644
--- a/test/Transforms/Reassociate/mulfactor.ll
+++ b/test/Transforms/Reassociate/mulfactor.ll
@@ -1,14 +1,134 @@
-; RUN: opt < %s -reassociate -instcombine -S | grep mul | count 2
+; RUN: opt < %s -reassociate -S | FileCheck %s
 
-; This should have exactly 2 multiplies when we're done.
+define i32 @test1(i32 %a, i32 %b) {
+; CHECK: @test1
+; CHECK: mul i32 %a, %a
+; CHECK-NEXT: mul i32 %a, 2
+; CHECK-NEXT: add
+; CHECK-NEXT: mul
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
 
-define i32 @f(i32 %a, i32 %b) {
-	%tmp.2 = mul i32 %a, %a		; <i32> [#uses=1]
-	%tmp.5 = shl i32 %a, 1		; <i32> [#uses=1]
-	%tmp.6 = mul i32 %tmp.5, %b		; <i32> [#uses=1]
-	%tmp.10 = mul i32 %b, %b		; <i32> [#uses=1]
-	%tmp.7 = add i32 %tmp.6, %tmp.2		; <i32> [#uses=1]
-	%tmp.11 = add i32 %tmp.7, %tmp.10		; <i32> [#uses=1]
+entry:
+	%tmp.2 = mul i32 %a, %a
+	%tmp.5 = shl i32 %a, 1
+	%tmp.6 = mul i32 %tmp.5, %b
+	%tmp.10 = mul i32 %b, %b
+	%tmp.7 = add i32 %tmp.6, %tmp.2
+	%tmp.11 = add i32 %tmp.7, %tmp.10
 	ret i32 %tmp.11
 }
 
+define i32 @test2(i32 %t) {
+; CHECK: @test2
+; CHECK: mul
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+
+entry:
+	%a = mul i32 %t, 6
+	%b = mul i32 %t, 36
+	%c = add i32 %b, 15
+	%d = add i32 %c, %a
+	ret i32 %d
+}
+
+define i32 @test3(i32 %x) {
+; (x^8)
+; CHECK: @test3
+; CHECK: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+
+entry:
+  %a = mul i32 %x, %x
+  %b = mul i32 %a, %x
+  %c = mul i32 %b, %x
+  %d = mul i32 %c, %x
+  %e = mul i32 %d, %x
+  %f = mul i32 %e, %x
+  %g = mul i32 %f, %x
+  ret i32 %g
+}
+
+define i32 @test4(i32 %x) {
+; (x^7)
+; CHECK: @test4
+; CHECK: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+
+entry:
+  %a = mul i32 %x, %x
+  %b = mul i32 %a, %x
+  %c = mul i32 %b, %x
+  %d = mul i32 %c, %x
+  %e = mul i32 %d, %x
+  %f = mul i32 %e, %x
+  ret i32 %f
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+; (x^4) * (y^2)
+; CHECK: @test5
+; CHECK: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+
+entry:
+  %a = mul i32 %x, %y
+  %b = mul i32 %a, %y
+  %c = mul i32 %b, %x
+  %d = mul i32 %c, %x
+  %e = mul i32 %d, %x
+  ret i32 %e
+}
+
+define i32 @test6(i32 %x, i32 %y, i32 %z) {
+; (x^5) * (y^3) * z
+; CHECK: @test6
+; CHECK: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+
+entry:
+  %a = mul i32 %x, %y
+  %b = mul i32 %a, %x
+  %c = mul i32 %b, %y
+  %d = mul i32 %c, %x
+  %e = mul i32 %d, %y
+  %f = mul i32 %e, %x
+  %g = mul i32 %f, %z
+  %h = mul i32 %g, %x
+  ret i32 %h
+}
+
+define i32 @test7(i32 %x, i32 %y, i32 %z) {
+; (x^4) * (y^3) * (z^2)
+; CHECK: @test7
+; CHECK: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+
+entry:
+  %a = mul i32 %y, %x
+  %b = mul i32 %a, %z
+  %c = mul i32 %b, %z
+  %d = mul i32 %c, %x
+  %e = mul i32 %d, %y
+  %f = mul i32 %e, %y
+  %g = mul i32 %f, %x
+  %h = mul i32 %g, %x
+  ret i32 %h
+}
diff --git a/test/Transforms/Reassociate/mulfactor2.ll b/test/Transforms/Reassociate/mulfactor2.ll
deleted file mode 100644
index 8116554..0000000
--- a/test/Transforms/Reassociate/mulfactor2.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; This should turn into one multiply and one add.
-
-; RUN: opt < %s -instcombine -reassociate -instcombine -S > %t
-; RUN: grep mul %t | count 1
-; RUN: grep add %t | count 1
-
-define i32 @main(i32 %t) {
-	%tmp.3 = mul i32 %t, 12		; <i32> [#uses=1]
-	%tmp.4 = add i32 %tmp.3, 5		; <i32> [#uses=1]
-	%tmp.6 = mul i32 %t, 6		; <i32> [#uses=1]
-	%tmp.8 = mul i32 %tmp.4, 3		; <i32> [#uses=1]
-	%tmp.9 = add i32 %tmp.8, %tmp.6		; <i32> [#uses=1]
-	ret i32 %tmp.9
-}
-
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
new file mode 100644
index 0000000..7466d2e
--- /dev/null
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
+; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+; CHECK: @multistep1
+  %t0 = mul i64 %a, %b
+  %t1 = mul i64 %a, %t0 ; a*(a*b)
+  %t2 = mul i64 %a, %c
+  %t3 = mul i64 %a, %t2 ; a*(a*c)
+  %t4 = add i64 %t1, %t3
+; CHECK-NEXT: add i64 %c, %b
+; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: ret
+  ret i64 %t4
+}
+
+define i64 @multistep2(i64 %a, i64 %b, i64 %c, i64 %d) {
+; Check that a*b+a*c+d is turned into a*(b+c)+d.
+; CHECK: @multistep2
+  %t0 = mul i64 %a, %b
+  %t1 = mul i64 %a, %c
+  %t2 = add i64 %t1, %d ; a*c+d
+  %t3 = add i64 %t0, %t2 ; a*b+(a*c+d)
+; CHECK-NEXT: add i64 %c, %b
+; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: add i64 %tmp{{.*}}, %d
+; CHECK-NEXT: ret
+  ret i64 %t3
+}
+
diff --git a/test/Transforms/Reassociate/no-op.ll b/test/Transforms/Reassociate/no-op.ll
new file mode 100644
index 0000000..0444cf0
--- /dev/null
+++ b/test/Transforms/Reassociate/no-op.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; When there is nothing to do, or not much to do, check that reassociate leaves
+; things alone.
+
+declare void @use(i32)
+
+define void @test1(i32 %a, i32 %b) {
+; Shouldn't change or move any of the add instructions.  Should commute but
+; otherwise not change or move any of the mul instructions.
+; CHECK: @test1
+  %a0 = add nsw i32 %a, 1
+; CHECK-NEXT: %a0 = add nsw i32 %a, 1
+  %m0 = mul nsw i32 3, %a
+; CHECK-NEXT: %m0 = mul nsw i32 %a, 3
+  %a1 = add nsw i32 %a0, %b
+; CHECK-NEXT: %a1 = add nsw i32 %a0, %b
+  %m1 = mul nsw i32 %b, %m0
+; CHECK-NEXT: %m1 = mul nsw i32 %m0, %b
+  call void @use(i32 %a1)
+; CHECK-NEXT: call void @use
+  call void @use(i32 %m1)
+  ret void
+}
+
+define void @test2(i32 %a, i32 %b, i32 %c, i32 %d) {
+; The initial add doesn't change so should not lose the nsw flag.
+; CHECK: @test2
+  %a0 = add nsw i32 %b, %a
+; CHECK-NEXT: %a0 = add nsw i32 %b, %a
+  %a1 = add nsw i32 %a0, %d
+; CHECK-NEXT: %a1 = add i32 %a0, %c
+  %a2 = add nsw i32 %a1, %c
+; CHECK-NEXT: %a2 = add i32 %a1, %d
+  call void @use(i32 %a2)
+; CHECK-NEXT: call void @use
+  ret void
+}
diff --git a/test/Transforms/Reassociate/repeats.ll b/test/Transforms/Reassociate/repeats.ll
new file mode 100644
index 0000000..6a02047
--- /dev/null
+++ b/test/Transforms/Reassociate/repeats.ll
@@ -0,0 +1,252 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; Tests involving repeated operations on the same value.
+
+define i8 @nilpotent(i8 %x) {
+; CHECK: @nilpotent
+  %tmp = xor i8 %x, %x
+  ret i8 %tmp
+; CHECK: ret i8 0
+}
+
+define i2 @idempotent(i2 %x) {
+; CHECK: @idempotent
+  %tmp1 = and i2 %x, %x
+  %tmp2 = and i2 %tmp1, %x
+  %tmp3 = and i2 %tmp2, %x
+  ret i2 %tmp3
+; CHECK: ret i2 %x
+}
+
+define i2 @add(i2 %x) {
+; CHECK: @add
+  %tmp1 = add i2 %x, %x
+  %tmp2 = add i2 %tmp1, %x
+  %tmp3 = add i2 %tmp2, %x
+  ret i2 %tmp3
+; CHECK: ret i2 0
+}
+
+define i2 @cst_add() {
+; CHECK: @cst_add
+  %tmp1 = add i2 1, 1
+  %tmp2 = add i2 %tmp1, 1
+  ret i2 %tmp2
+; CHECK: ret i2 -1
+}
+
+define i8 @cst_mul() {
+; CHECK: @cst_mul
+  %tmp1 = mul i8 3, 3
+  %tmp2 = mul i8 %tmp1, 3
+  %tmp3 = mul i8 %tmp2, 3
+  %tmp4 = mul i8 %tmp3, 3
+  ret i8 %tmp4
+; CHECK: ret i8 -13
+}
+
+define i3 @foo3x5(i3 %x) {
+; Can be done with two multiplies.
+; CHECK: @foo3x5
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i3 %x, %x
+  %tmp2 = mul i3 %tmp1, %x
+  %tmp3 = mul i3 %tmp2, %x
+  %tmp4 = mul i3 %tmp3, %x
+  ret i3 %tmp4
+}
+
+define i3 @foo3x6(i3 %x) {
+; Can be done with two multiplies.
+; CHECK: @foo3x6
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i3 %x, %x
+  %tmp2 = mul i3 %tmp1, %x
+  %tmp3 = mul i3 %tmp2, %x
+  %tmp4 = mul i3 %tmp3, %x
+  %tmp5 = mul i3 %tmp4, %x
+  ret i3 %tmp5
+}
+
+define i3 @foo3x7(i3 %x) {
+; Can be done with two multiplies.
+; CHECK: @foo3x7
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i3 %x, %x
+  %tmp2 = mul i3 %tmp1, %x
+  %tmp3 = mul i3 %tmp2, %x
+  %tmp4 = mul i3 %tmp3, %x
+  %tmp5 = mul i3 %tmp4, %x
+  %tmp6 = mul i3 %tmp5, %x
+  ret i3 %tmp6
+}
+
+define i4 @foo4x8(i4 %x) {
+; Can be done with two multiplies.
+; CHECK: @foo4x8
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  ret i4 %tmp7
+}
+
+define i4 @foo4x9(i4 %x) {
+; Can be done with three multiplies.
+; CHECK: @foo4x9
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  ret i4 %tmp8
+}
+
+define i4 @foo4x10(i4 %x) {
+; Can be done with three multiplies.
+; CHECK: @foo4x10
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  ret i4 %tmp9
+}
+
+define i4 @foo4x11(i4 %x) {
+; Can be done with four multiplies.
+; CHECK: @foo4x11
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  %tmp10 = mul i4 %tmp9, %x
+  ret i4 %tmp10
+}
+
+define i4 @foo4x12(i4 %x) {
+; Can be done with two multiplies.
+; CHECK: @foo4x12
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  %tmp10 = mul i4 %tmp9, %x
+  %tmp11 = mul i4 %tmp10, %x
+  ret i4 %tmp11
+}
+
+define i4 @foo4x13(i4 %x) {
+; Can be done with three multiplies.
+; CHECK: @foo4x13
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  %tmp10 = mul i4 %tmp9, %x
+  %tmp11 = mul i4 %tmp10, %x
+  %tmp12 = mul i4 %tmp11, %x
+  ret i4 %tmp12
+}
+
+define i4 @foo4x14(i4 %x) {
+; Can be done with three multiplies.
+; CHECK: @foo4x14
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  %tmp10 = mul i4 %tmp9, %x
+  %tmp11 = mul i4 %tmp10, %x
+  %tmp12 = mul i4 %tmp11, %x
+  %tmp13 = mul i4 %tmp12, %x
+  ret i4 %tmp13
+}
+
+define i4 @foo4x15(i4 %x) {
+; Can be done with four multiplies.
+; CHECK: @foo4x15
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: mul
+; CHECK-NEXT: ret
+  %tmp1 = mul i4 %x, %x
+  %tmp2 = mul i4 %tmp1, %x
+  %tmp3 = mul i4 %tmp2, %x
+  %tmp4 = mul i4 %tmp3, %x
+  %tmp5 = mul i4 %tmp4, %x
+  %tmp6 = mul i4 %tmp5, %x
+  %tmp7 = mul i4 %tmp6, %x
+  %tmp8 = mul i4 %tmp7, %x
+  %tmp9 = mul i4 %tmp8, %x
+  %tmp10 = mul i4 %tmp9, %x
+  %tmp11 = mul i4 %tmp10, %x
+  %tmp12 = mul i4 %tmp11, %x
+  %tmp13 = mul i4 %tmp12, %x
+  %tmp14 = mul i4 %tmp13, %x
+  ret i4 %tmp14
+}
diff --git a/test/Transforms/Reassociate/shifttest.ll b/test/Transforms/Reassociate/shifttest.ll
index 8b2cbc9..d9a5336 100644
--- a/test/Transforms/Reassociate/shifttest.ll
+++ b/test/Transforms/Reassociate/shifttest.ll
@@ -1,7 +1,7 @@
 ; With shl->mul reassociation, we can see that this is (shl A, 9) * A
 ;
 ; RUN: opt < %s -reassociate -instcombine -S |\
-; RUN:    grep {shl .*, 9}
+; RUN:    grep "shl .*, 9"
 
 define i32 @test(i32 %A, i32 %B) {
 	%X = shl i32 %A, 5		; <i32> [#uses=1]
diff --git a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
index 4adfde3..c847b4e 100644
--- a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
+++ b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sccp -S | grep {ret i32 1}
+; RUN: opt < %s -sccp -S | grep "ret i32 1"
 
 ; This function definitely returns 1, even if we don't know the direction
 ; of the branch.
diff --git a/test/Transforms/SCCP/2006-12-19-UndefBug.ll b/test/Transforms/SCCP/2006-12-19-UndefBug.ll
index ec69ce0..ede1a32 100644
--- a/test/Transforms/SCCP/2006-12-19-UndefBug.ll
+++ b/test/Transforms/SCCP/2006-12-19-UndefBug.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -sccp -S | \
-; RUN:   grep {ret i1 false}
+; RUN:   grep "ret i1 false"
 
 define i1 @foo() {
 	%X = and i1 false, undef		; <i1> [#uses=1]
diff --git a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
index a40455c..e7168dd 100644
--- a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
+++ b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sccp -S | grep {ret i32 %Z}
+; RUN: opt < %s -sccp -S | grep "ret i32 %Z"
 ; rdar://5778210
 
 declare {i32, i32} @bar(i32 %A) 
diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
index 63f41db..4688643 100644
--- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
+++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sccp -S | not grep {ret i32 undef}
+; RUN: opt < %s -sccp -S | not grep "ret i32 undef"
 ; PR2358
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index f62ed70..c05f897 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -ipsccp -S | grep {ret i32 42}
-; RUN: opt < %s -ipsccp -S | grep {ret i32 undef}
+; RUN: opt < %s -ipsccp -S | grep "ret i32 42"
+; RUN: opt < %s -ipsccp -S | grep "ret i32 undef"
 ; PR3325
 
 define i32 @main() {
diff --git a/test/Transforms/SCCP/apint-array.ll b/test/Transforms/SCCP/apint-array.ll
index 1e75878..888b9e1 100644
--- a/test/Transforms/SCCP/apint-array.ll
+++ b/test/Transforms/SCCP/apint-array.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sccp -S | grep {ret i101 12}
+; RUN: opt < %s -sccp -S | grep "ret i101 12"
 
 @Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,i101 
 -123456789000000, i101 0,i101 9123456789000000]
diff --git a/test/Transforms/SCCP/apint-basictest4.ll b/test/Transforms/SCCP/apint-basictest4.ll
index 8624260..572f97c 100644
--- a/test/Transforms/SCCP/apint-basictest4.ll
+++ b/test/Transforms/SCCP/apint-basictest4.ll
@@ -4,7 +4,7 @@
 
 ; RUN: opt < %s -sccp -S | not grep and
 ; RUN: opt < %s -sccp -S | not grep trunc
-; RUN: opt < %s -sccp -S | grep {ret i100 -1}
+; RUN: opt < %s -sccp -S | grep "ret i100 -1"
 
 define i100 @test(i133 %A) {
         %B = and i133 0, %A
diff --git a/test/Transforms/SCCP/apint-ipsccp1.ll b/test/Transforms/SCCP/apint-ipsccp1.ll
index fda40f5..f6f18fe 100644
--- a/test/Transforms/SCCP/apint-ipsccp1.ll
+++ b/test/Transforms/SCCP/apint-ipsccp1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -ipsccp -S | grep -v {ret i512 undef} | \
-; RUN:   grep {ret i8 2}
+; RUN: opt < %s -ipsccp -S | grep -v "ret i512 undef" | \
+; RUN:   grep "ret i8 2"
 
 define internal i512 @test(i1 %B) {
 	br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/SCCP/apint-ipsccp2.ll b/test/Transforms/SCCP/apint-ipsccp2.ll
index 3c02e05..834cca4 100644
--- a/test/Transforms/SCCP/apint-ipsccp2.ll
+++ b/test/Transforms/SCCP/apint-ipsccp2.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -ipsccp -S | grep -v {ret i101 0} | \
-; RUN:    grep -v {ret i101 undef} | not grep ret
+; RUN: opt < %s -ipsccp -S | grep -v "ret i101 0" | \
+; RUN:    grep -v "ret i101 undef" | not grep ret
 
 
 define internal i101 @bar(i101 %A) {
diff --git a/test/Transforms/SCCP/logical-nuke.ll b/test/Transforms/SCCP/logical-nuke.ll
index b3d845c..45f6f44 100644
--- a/test/Transforms/SCCP/logical-nuke.ll
+++ b/test/Transforms/SCCP/logical-nuke.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sccp -S | grep {ret i32 0}
+; RUN: opt < %s -sccp -S | grep "ret i32 0"
 
 ; Test that SCCP has basic knowledge of when and/or nuke overdefined values.
 
diff --git a/test/Transforms/SCCP/vector-bitcast.ll b/test/Transforms/SCCP/vector-bitcast.ll
new file mode 100644
index 0000000..b032085
--- /dev/null
+++ b/test/Transforms/SCCP/vector-bitcast.ll
@@ -0,0 +1,20 @@
+; RUN: opt -sccp -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+; CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %p
+; rdar://11324230
+
+define void @foo(<2 x i64>* %p) nounwind {
+entry:
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %while.body.i, %entry
+  %vWorkExponent.i.033 = phi <4 x i32> [ %sub.i.i, %while.body.i ], [ <i32 939524096, i32 939524096, i32 939524096, i32 939524096>, %entry ]
+  %sub.i.i = add <4 x i32> %vWorkExponent.i.033, <i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608>
+  %0 = bitcast <4 x i32> %sub.i.i to <2 x i64>
+  %and.i119.i = and <2 x i64> %0, zeroinitializer
+  store volatile <2 x i64> %and.i119.i, <2 x i64>* %p
+  br label %while.body.i
+}
+
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index eb1c945..0b5e415 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,7 +1,7 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
 ; RUN: opt < %s -scalarrepl -S | \
-; RUN:   sed {s/;.*//g} | grep {\\\[}
+; RUN:   sed "s/;.*//g" | grep "\["
 
 define i8* @test() {
 	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
index 00e43a7..77c7b54 100644
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | grep {alloca %%T}
+; RUN: opt < %s -scalarrepl -S | grep "alloca %%T"
 
 %T = type { [80 x i8], i32, i32 }
 declare i32 @.callback_1(i8*)
diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
index 8bc4ff0..a53f3de 100644
--- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i8 17}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17"
 ; rdar://5707076
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.1.0"
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index 71ba601..f597613 100644
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | grep {call.*mem} 
+; RUN: opt < %s -scalarrepl -S | grep "call.*mem" 
 ; PR2369
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index 7cccb19..b2a9d43 100644
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | grep {s = alloca .struct.x}
+; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x"
 ; PR2423
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
index 9c0f203..3c8a364 100644
--- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
+++ b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i32 %x}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
diff --git a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
index f8ab875..67228a7 100644
--- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
+++ b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep {ret i32 42}
+; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42"
 ; PR3489
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 3218d59..a4182d4 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -1,6 +1,6 @@
 ; The store into %p should end up with a known alignment of 1, since the memcpy
 ; is only known to access it with 1-byte alignment.
-; RUN: opt < %s -scalarrepl -S | grep {store i16 1, .*, align 1}
+; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1"
 ; PR3720
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
index 98fa1c6..4596885 100644
--- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@@ -10,8 +10,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; CHECK: main
 ; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = and i128
-; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
+; CHECK: extractelement <2 x float> zeroinitializer, i32 0
 
 define void @main() uwtable ssp {
 entry:
@@ -28,8 +27,7 @@ entry:
 
 ; CHECK: test1
 ; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = and i128
-; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
+; CHECK: extractelement <2 x float> zeroinitializer, i32 0
 
 define void @test1() uwtable ssp {
 entry:
@@ -43,9 +41,8 @@ entry:
 
 ; CHECK: test2
 ; CHECK-NOT: alloca
-; CHECK: and i128
-; CHECK: or i128
-; CHECK: trunc i128
+; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0
+; CHECK: fadd float %[[A]], 1.000000e+00
 ; CHECK-NOT: insertelement
 ; CHECK-NOT: extractelement
 
@@ -62,3 +59,17 @@ entry:
   %r = fadd float %r1, %r2
   ret float %r
 }
+
+; CHECK: test3
+; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> <float 2.000000e+00, float 3.000000e+00>, i32 1
+; CHECK: ret float %[[A]]
+
+define float @test3() {
+entry:
+  %ai = alloca { <2 x float>, <2 x float> }, align 8
+  store { <2 x float>, <2 x float> } {<2 x float> <float 0.0, float 1.0>, <2 x float> <float 2.0, float 3.0>}, { <2 x float>, <2 x float> }* %ai, align 8
+  %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]*
+  %arrayidx = getelementptr inbounds [4 x float]* %tmpcast, i64 0, i64 3
+  %f = load float* %arrayidx, align 4
+  ret float %f
+}
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index cd4dc32..58c5a3a 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -260,5 +260,27 @@ entry:
   ret void
 }
 
+; rdar://11861001 - The dynamic GEP here was incorrectly making all accesses
+; to the alloca think they were also dynamic.  Inserts and extracts created to
+; access the vector were all being based from the dynamic access, even in BBs
+; not dominated by the GEP.
+define fastcc void @test() optsize inlinehint ssp align 2 {
+entry:
+  %alloc.0.0 = alloca <4 x float>, align 16
+  %bitcast = bitcast <4 x float>* %alloc.0.0 to [4 x float]*
+  %idx3 = getelementptr inbounds [4 x float]* %bitcast, i32 0, i32 3
+  store float 0.000000e+00, float* %idx3, align 4
+  br label %for.body10
+
+for.body10:                                       ; preds = %for.body10, %entry
+  %loopidx = phi i32 [ 0, %entry ], [ undef, %for.body10 ]
+  %unusedidx = getelementptr inbounds <4 x float>* %alloc.0.0, i32 0, i32 %loopidx
+  br i1 undef, label %for.end, label %for.body10
+
+for.end:                                          ; preds = %for.body10
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00>, <4 x float>* %alloc.0.0, align 16
+  ret void
+}
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
new file mode 100644
index 0000000..565cd76
--- /dev/null
+++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
@@ -0,0 +1,167 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: @test1
+; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
+; CHECK: memset
+; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
+
+; Split the array but don't replace the memset with an insert
+; element as its not a constant offset.
+; The load, however, can be replaced with an extract element.
+define float @test1(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca [4 x <4 x float>]
+  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+  %cast = bitcast float* %ptr1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
+  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: @test2
+; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
+; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
+
+; Do SROA on the array when it has dynamic vector reads and writes.
+define float @test2(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca [4 x <4 x float>]
+  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: test3
+; CHECK: %0 = alloca [4 x <4 x float>]
+; CHECK-NOT: alloca
+
+; Don't do SROA on a dynamically indexed vector when it spans
+; more than one array element of the alloca array it is within.
+define float @test3(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca [4 x <4 x float>]
+  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
+  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: test4
+; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
+; CHECK: extractelement <16 x float> %0, i32 %idx2
+
+; Don't do SROA on a dynamically indexed vector when it spans
+; more than one array element of the alloca array it is within.
+; However, unlike test3, the store is on the vector type
+; so SROA will convert the large alloca into the large vector
+; type and do all accesses with insert/extract element
+define float @test4(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca [4 x <4 x float>]
+  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
+  store <16 x float> zeroinitializer, <16 x float>* %bigvec
+  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: @test5
+; CHECK: %0 = alloca [4 x <4 x float>]
+; CHECK-NOT: alloca
+
+; Don't do SROA as the is a second dynamically indexed array
+; which may span multiple elements of the alloca.
+define float @test5(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca [4 x <4 x float>]
+  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+  %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
+  %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
+  %ret = load float* %ptr4
+  ret float %ret
+}
+
+; CHECK: test6
+; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
+; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
+
+%vector.pair = type { %vector.anon, %vector.anon }
+%vector.anon = type { %vector }
+%vector = type { <4 x float> }
+
+; Dynamic GEPs on vectors were crashing when the vector was inside a struct
+; as the new GEP for the new alloca might not include all the indices from
+; the original GEP, just the indices it needs to get to the correct offset of
+; some type, not necessarily the dynamic vector.
+; This test makes sure we don't have this crash.
+define float @test6(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca %vector.pair
+  store %vector.pair zeroinitializer, %vector.pair* %0
+  %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: test7
+; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
+; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
+
+%array.pair = type { [2 x %array.anon], %array.anon }
+%array.anon = type { [2 x %vector] }
+
+; This is the same as test6 and tests the same crash, but on arrays.
+define float @test7(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca %array.pair
+  store %array.pair zeroinitializer, %array.pair* %0
+  %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
+  store float 1.0, float* %ptr1
+  %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
+  %ret = load float* %ptr2
+  ret float %ret
+}
+
+; CHECK: test8
+; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
+; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
+; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
+; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
+
+; Do SROA on the vector when it has dynamic vector reads and writes
+; from a non-zero offset.
+define float @test8(i32 %idx1, i32 %idx2) {
+entry:
+  %0 = alloca <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* %0
+  %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
+  %ptr2 = bitcast float* %ptr1 to <3 x float>*
+  %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
+  store float 1.0, float* %ptr3
+  %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
+  %ptr5 = bitcast float* %ptr4 to <2 x float>*
+  %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
+  %ret = load float* %ptr6
+  ret float %ret
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index 59475ad..5557a8f 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -45,8 +45,10 @@ declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %T = type { i8, [123 x i8] }
+%U = type { i32, i32, i32, i32, i32 }
 
 @G = constant %T {i8 1, [123 x i8] zeroinitializer }
+@H = constant [2 x %U] zeroinitializer, align 16
 
 define void @test2() {
   %A = alloca %T
@@ -108,3 +110,37 @@ define void @test5() {
 
 
 declare void @baz(i8* byval)
+
+
+define void @test6() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test6
+; CHECK-NEXT: %a = bitcast
+; CHECK-NEXT: call void @bar(i8* %a)
+  ret void
+}
+
+define void @test7() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test7
+; CHECK-NEXT: %a = bitcast
+; CHECK-NEXT: call void @bar(i8* %a)
+  ret void
+}
+
+define void @test8() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test8
+; CHECK: llvm.memcpy
+; CHECK: bar
+  ret void
+}
diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index 0d61e5a..3510dfc 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -1,6 +1,6 @@
 ; PR1226
 ; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep {call void @llvm.memcpy.i32}
+; RUN:   not grep "call void @llvm.memcpy.i32"
 ; RUN: opt < %s -scalarrepl -S | grep getelementptr
 ; END.
 
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index 42e7a0f..95ecf17 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -1,7 +1,7 @@
 ; PR1226
-; RUN: opt < %s -scalarrepl -S | grep {ret i32 16843009}
+; RUN: opt < %s -scalarrepl -S | grep "ret i32 16843009"
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i16 514}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i16 514"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll
index f873456..67fefb4 100644
--- a/test/Transforms/ScalarRepl/not-a-vector.ll
+++ b/test/Transforms/ScalarRepl/not-a-vector.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -S | not grep {7 x double}
-; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret double %B}
+; RUN: opt < %s -scalarrepl -S | not grep "7 x double"
+; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret double %B"
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define double @test(double %A, double %B) {
diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll
index 8b7e50d..6a49918 100644
--- a/test/Transforms/ScalarRepl/union-fp-int.ll
+++ b/test/Transforms/ScalarRepl/union-fp-int.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
 ; RUN: opt < %s -scalarrepl -S | \
-; RUN:   grep {bitcast.*float.*i32}
+; RUN:   grep "bitcast.*float.*i32"
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @test(float %X) {
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index ea4ec14..03d25ac 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -1,7 +1,7 @@
 ; PR892
 ; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | grep {ret i8}
+; RUN: opt < %s -scalarrepl -S | grep "ret i8"
 
 target datalayout = "e-p:32:32-n8:16:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
index decbd30..33e8034 100644
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalarrepl -S > %t
-; RUN: grep {ret <16 x float> %A} %t
-; RUN: grep {ret <16 x float> zeroinitializer} %t
+; RUN: grep "ret <16 x float> %A" %t
+; RUN: grep "ret <16 x float> zeroinitializer" %t
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define <16 x float> @foo(<16 x float> %A) nounwind {
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index fadf1aa..056526c 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -scalarrepl -S | grep {load volatile}
-; RUN: opt < %s -scalarrepl -S | grep {store volatile}
+; RUN: opt < %s -scalarrepl -S | grep "load volatile"
+; RUN: opt < %s -scalarrepl -S | grep "store volatile"
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
index 414235b..feffb4e 100644
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
@@ -1,7 +1,7 @@
 ; Basic block #2 should not be merged into BB #3!
 ;
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep {br label}
+; RUN:   grep "br label"
 ;
 
 declare void @foo()
diff --git a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll b/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
deleted file mode 100644
index bc61a75..0000000
--- a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Do not remove the invoke!
-;
-; RUN: opt < %s -simplifycfg -S | grep invoke
-
-define i32 @test() {
-	invoke i32 @test( )
-			to label %Ret unwind label %Ret		; <i32>:1 [#uses=0]
-Ret:		; preds = %0, %0
-        %val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                 catch i8* null
-	%A = add i32 0, 1		; <i32> [#uses=1]
-	ret i32 %A
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
index 8ac9ae4..fc89b16 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
@@ -2,7 +2,7 @@
 ; 'br Dest'
 
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep {br i1 %c2}
+; RUN:   not grep "br i1 %c2"
 
 declare void @noop()
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
index 888e187..c1b032f 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
@@ -4,7 +4,7 @@
 ; the ConstantFoldTerminator function.
 
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep {br i1 %c2}
+; RUN:   not grep "br i1 %c2"
 
 declare void @noop()
 
diff --git a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
deleted file mode 100644
index 27d9d8f..0000000
--- a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
+++ /dev/null
@@ -1,567 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; END.
-	%struct..4._102 = type { %struct.QVectorData* }
-	%struct..5._125 = type { %struct.QMapData* }
-	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
-	%struct.QBasicAtomic = type { i32 }
-	%struct.QFont = type { %struct.QFontPrivate*, i32 }
-	%struct.QFontMetrics = type { %struct.QFontPrivate* }
-	%struct.QFontPrivate = type opaque
-	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
-	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
-	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
-	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
-	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
-	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
-	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
-	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
-	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
-	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
-	%struct.QListData = type { %"struct.QListData::Data"* }
-	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
-	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
-	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
-	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
-	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
-	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
-	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
-	%struct.QPaintDevice = type { i32 (...)**, i16 }
-	%struct.QPainter = type { %struct.QPainterPrivate* }
-	%struct.QPainterPrivate = type opaque
-	%struct.QPointF = type { double, double }
-	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
-	%struct.QPrinterPrivate = type opaque
-	%struct.QRectF = type { double, double, double, double }
-	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
-	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
-	%struct.QString = type { %"struct.QString::Data"* }
-	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
-	%struct.QTextBlockFormat = type { %struct.QTextFormat }
-	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
-	%struct.QTextDocumentConfig = type { %struct.QString }
-	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
-	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
-	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
-	%struct.QTextFormatPrivate = type opaque
-	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
-	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
-
-define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
-entry:
-	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
-	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
-	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
-	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
-	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
-	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
-	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
-	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
-	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
-	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
-	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
-	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
-			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
-invcont:		; preds = %entry
-	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
-cond_next:		; preds = %invcont
-	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
-invcont7:		; preds = %cond_next
-	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
-	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
-	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
-	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
-	br i1 %tmp13, label %cond_next15, label %bb
-cond_next15:		; preds = %invcont7
-	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
-	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
-	br i1 %tmp18, label %bb, label %cond_next20
-cond_next20:		; preds = %cond_next15
-	br label %bb21
-bb:		; preds = %cond_next15, %invcont7
-	br label %bb21
-bb21:		; preds = %bb, %cond_next20
-	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
-	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
-cond_true24:		; preds = %bb21
-	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
-			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
-invcont25:		; preds = %cond_true24
-	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
-	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont29:		; preds = %invcont25
-	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
-			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
-invcont31:		; preds = %invcont29
-	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
-	br i1 %tmp34, label %cond_next42, label %cond_true35
-cond_true35:		; preds = %invcont31
-	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
-invcont37:		; preds = %cond_true35
-	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
-	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
-invcont40:		; preds = %invcont37
-	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
-	br label %cond_next42
-cond_next42:		; preds = %invcont40, %invcont31
-	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
-			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
-invcont45:		; preds = %cond_next42
-	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
-	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
-	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
-			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
-invcont51:		; preds = %invcont45
-	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
-	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
-			to label %invcont57 unwind label %cleanup329
-invcont57:		; preds = %invcont51
-	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
-	%tmp61 = load double* %tmp60		; <double> [#uses=1]
-	store double %tmp61, double* %tmp.upgrd.13
-	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
-	%tmp64 = load double* %tmp63		; <double> [#uses=1]
-	store double %tmp64, double* %tmp62
-	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
-	store double %tmp69, double* %tmp65
-	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
-	store double %tmp75, double* %tmp71
-	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
-			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
-invcont79:		; preds = %invcont57
-	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
-			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
-invcont83:		; preds = %invcont79
-	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
-	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
-	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
-	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
-	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
-			to label %cond_next194 unwind label %cleanup329
-cond_false:		; preds = %bb21
-	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
-	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
-			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
-invcont94:		; preds = %cond_false
-	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont98:		; preds = %invcont94
-	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont100:		; preds = %invcont98
-	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
-			to label %invcont103 unwind label %cleanup329
-invcont103:		; preds = %invcont100
-	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont104:		; preds = %invcont103
-	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
-			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
-invcont106:		; preds = %invcont104
-	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
-	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
-	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
-			to label %invcont111 unwind label %cleanup329
-invcont111:		; preds = %invcont106
-	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
-			to label %invcont114 unwind label %cleanup192
-invcont114:		; preds = %invcont111
-	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
-			to label %invcont117 unwind label %cleanup192
-invcont117:		; preds = %invcont114
-	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont118:		; preds = %invcont117
-	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
-			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
-invcont120:		; preds = %invcont118
-	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
-	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont122:		; preds = %invcont120
-	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
-			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
-invcont124:		; preds = %invcont122
-	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
-	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
-	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp128 = load double* %tmp127		; <double> [#uses=1]
-	store double %tmp128, double* %tmp126
-	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp131 = load double* %tmp130		; <double> [#uses=1]
-	store double %tmp131, double* %tmp129
-	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp134 = load double* %tmp133		; <double> [#uses=1]
-	store double %tmp134, double* %tmp132
-	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp137 = load double* %tmp136		; <double> [#uses=1]
-	store double %tmp137, double* %tmp135
-	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
-	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont141:		; preds = %invcont124
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont144 unwind label %cleanup192
-invcont144:		; preds = %invcont141
-	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
-			to label %invcont146 unwind label %cleanup173
-invcont146:		; preds = %invcont144
-	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
-invcont148:		; preds = %invcont146
-	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
-	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
-invcont151:		; preds = %invcont148
-	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
-			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
-invcont153:		; preds = %invcont151
-	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
-	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
-	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
-	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
-	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp163 = load double* %tmp162		; <double> [#uses=1]
-	store double %tmp163, double* %tmp161
-	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp166 = load double* %tmp165		; <double> [#uses=1]
-	store double %tmp166, double* %tmp164
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup171 unwind label %cleanup173
-cleanup168:		; preds = %invcont151, %invcont148, %invcont146
-        %val168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup173 unwind label %cleanup173
-cleanup171:		; preds = %invcont153
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %finally170 unwind label %cleanup192
-cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
-        %val173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %cleanup192 unwind label %cleanup192
-finally170:		; preds = %cleanup171
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont177 unwind label %cleanup192
-invcont177:		; preds = %finally170
-	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
-			to label %invcont179 unwind label %cleanup187
-invcont179:		; preds = %invcont177
-	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
-			to label %invcont181 unwind label %cleanup187
-invcont181:		; preds = %invcont179
-	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
-	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
-			to label %cleanup185 unwind label %cleanup187
-cleanup185:		; preds = %invcont181
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup190 unwind label %cleanup192
-cleanup187:		; preds = %invcont181, %invcont179, %invcont177
-        %val187 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup192 unwind label %cleanup192
-cleanup190:		; preds = %cleanup185
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cond_next194 unwind label %cleanup329
-cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
-        %val192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cleanup329 unwind label %cleanup329
-cond_next194:		; preds = %cleanup190, %invcont83
-	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
-	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
-	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
-invcont196:		; preds = %cond_next194
-	br i1 %tmp197, label %cond_true200, label %cond_false204
-cond_true200:		; preds = %invcont196
-	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont202 unwind label %cleanup329		; <i32> [#uses=1]
-invcont202:		; preds = %cond_true200
-	br label %cond_next208
-cond_false204:		; preds = %invcont196
-	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont206 unwind label %cleanup329		; <i32> [#uses=1]
-invcont206:		; preds = %cond_false204
-	br label %cond_next208
-cond_next208:		; preds = %invcont206, %invcont202
-	%pageCopies.0 = phi i32 [ %tmp203, %invcont202 ], [ 1, %invcont206 ]		; <i32> [#uses=2]
-	%docCopies.0 = phi i32 [ 1, %invcont202 ], [ %tmp207, %invcont206 ]		; <i32> [#uses=2]
-	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
-			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
-invcont210:		; preds = %cond_next208
-	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
-			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
-invcont213:		; preds = %invcont210
-	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
-	br i1 %tmp216, label %cond_true217, label %cond_next225
-cond_true217:		; preds = %invcont213
-	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
-	br i1 %tmp219, label %cond_true220, label %cond_next225
-cond_true220:		; preds = %cond_true217
-	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
-			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
-invcont222:		; preds = %cond_true220
-	br label %cond_next225
-cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
-	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
-	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
-	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
-			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
-invcont227:		; preds = %cond_next225
-	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
-	br i1 %tmp228, label %cond_true230, label %cond_next234
-cond_true230:		; preds = %invcont227
-	br label %cond_next234
-cond_next234:		; preds = %cond_true230, %invcont227
-	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
-	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
-	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
-	br label %bb309
-bb237:		; preds = %cond_true313, %cond_next293
-	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
-	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
-	br label %bb273
-invcont240:		; preds = %cond_true277
-	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
-	br i1 %tmp242, label %bb252, label %cond_next244
-cond_next244:		; preds = %invcont240
-	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
-invcont246:		; preds = %cond_next244
-	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
-	br i1 %tmp248, label %bb252, label %bb253
-bb252:		; preds = %invcont246, %invcont240
-	br label %bb254
-bb253:		; preds = %invcont246
-	br label %bb254
-bb254:		; preds = %bb253, %bb252
-	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
-	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
-cond_next258:		; preds = %bb254
-	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
-			to label %invcont261 unwind label %cleanup329
-invcont261:		; preds = %cond_next258
-	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
-	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
-	br i1 %tmp265, label %cond_true266, label %cond_next270
-cond_true266:		; preds = %invcont261
-	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next270:		; preds = %cond_true266, %invcont261
-	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
-	br label %bb273
-bb273:		; preds = %cond_next270, %bb237
-	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
-	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
-	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
-	br i1 %tmp276, label %cond_true277, label %bb280
-cond_true277:		; preds = %bb273
-	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
-bb280:		; preds = %bb273
-	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
-	br i1 %tmp283, label %bb297, label %cond_next285
-cond_next285:		; preds = %bb280
-	br i1 %ascending.1, label %cond_true287, label %cond_false290
-cond_true287:		; preds = %cond_next285
-	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_false290:		; preds = %cond_next285
-	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_next293:		; preds = %cond_false290, %cond_true287
-	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
-	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
-bb297:		; preds = %bb280
-	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
-	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
-	br i1 %tmp301, label %cond_true302, label %cond_next306
-cond_true302:		; preds = %bb297
-	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next306:		; preds = %cond_true302, %bb297
-	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
-	br label %bb309
-bb309:		; preds = %cond_next306, %cond_next234
-	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
-	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
-	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
-	br i1 %tmp312, label %cond_true313, label %UserCanceled
-cond_true313:		; preds = %bb309
-	br label %bb237
-UserCanceled:		; preds = %bb309, %bb254
-	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
-	br i1 %tmp318, label %cleanup327, label %cond_true319
-cond_true319:		; preds = %UserCanceled
-	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
-	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
-	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
-	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
-	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
-			to label %cleanup327 unwind label %cleanup329
-cleanup327:		; preds = %cond_true319, %UserCanceled
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup328:		; preds = %invcont
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
-        %val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                 cleanup
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	resume { i8*, i32 } %val
-}
-
-declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
-
-declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
-
-declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
-
-declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
-
-declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
-
-declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
-
-declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
-
-declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
-
-declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
-
-declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
-
-declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
-
-declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
-
-declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
-
-declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
-
-declare void @_ZN5QFontD1Ev(%struct.QFont*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
-
-declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
-
-declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
-
-declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
-
-declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
-
-declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
-
-declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
-
-declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
-
-declare i32 @_Z13qt_defaultDpiv()
-
-declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
-
-declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
-
-declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
-
-declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
-
-declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
-
-declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
-
-declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
-
-declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
-
-declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 00f2d5b..14baeea 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -1,5 +1,5 @@
 ; The phi should not be eliminated in this case, because the fp op could trap.
-; RUN: opt < %s -simplifycfg -S | grep {= phi double}
+; RUN: opt < %s -simplifycfg -S | grep "= phi double"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
index 56f43b6..13ccad6 100644
--- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -simplifycfg -S > %t
-; RUN: not grep {^BB.tomerge} %t
-; RUN: grep {^BB.nomerge} %t | count 2
+; RUN: not grep "^BB.tomerge" %t
+; RUN: grep "^BB.nomerge" %t | count 2
 
 ; ModuleID = '<stdin>' 
 declare i1 @foo()
diff --git a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
index d025dee..9b6084f 100644
--- a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
+++ b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | grep {%outval = phi i32 .*mux}
+; RUN: opt < %s -simplifycfg -S | grep "%outval = phi i32 .*mux"
 ; PR2540
 ; Outval should end up with a select from 0/2, not all constants.
 
diff --git a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
deleted file mode 100644
index abf4455..0000000
--- a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
+++ /dev/null
@@ -1,569 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; END.
-	%struct..4._102 = type { %struct.QVectorData* }
-	%struct..5._125 = type { %struct.QMapData* }
-	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
-	%struct.QBasicAtomic = type { i32 }
-	%struct.QFont = type { %struct.QFontPrivate*, i32 }
-	%struct.QFontMetrics = type { %struct.QFontPrivate* }
-	%struct.QFontPrivate = type opaque
-	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
-	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
-	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
-	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
-	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
-	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
-	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
-	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
-	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
-	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
-	%struct.QListData = type { %"struct.QListData::Data"* }
-	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
-	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
-	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
-	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
-	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
-	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
-	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
-	%struct.QPaintDevice = type { i32 (...)**, i16 }
-	%struct.QPainter = type { %struct.QPainterPrivate* }
-	%struct.QPainterPrivate = type opaque
-	%struct.QPointF = type { double, double }
-	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
-	%struct.QPrinterPrivate = type opaque
-	%struct.QRectF = type { double, double, double, double }
-	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
-	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
-	%struct.QString = type { %"struct.QString::Data"* }
-	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
-	%struct.QTextBlockFormat = type { %struct.QTextFormat }
-	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
-	%struct.QTextDocumentConfig = type { %struct.QString }
-	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
-	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
-	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
-	%struct.QTextFormatPrivate = type opaque
-	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
-	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
-
-define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
-entry:
-	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
-	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
-	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
-	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
-	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
-        %foo = alloca double, align 8
-        %bar = alloca double, align 8
-	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
-	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
-	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
-	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
-	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
-	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
-	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
-			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
-invcont:		; preds = %entry
-	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
-cond_next:		; preds = %invcont
-	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
-invcont7:		; preds = %cond_next
-	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
-	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
-	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
-	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
-	br i1 %tmp13, label %cond_next15, label %bb
-cond_next15:		; preds = %invcont7
-	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
-	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
-	br i1 %tmp18, label %bb, label %cond_next20
-cond_next20:		; preds = %cond_next15
-	br label %bb21
-bb:		; preds = %cond_next15, %invcont7
-	br label %bb21
-bb21:		; preds = %bb, %cond_next20
-	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
-	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
-cond_true24:		; preds = %bb21
-	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
-			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
-invcont25:		; preds = %cond_true24
-	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
-	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont29:		; preds = %invcont25
-	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
-			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
-invcont31:		; preds = %invcont29
-	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
-	br i1 %tmp34, label %cond_next42, label %cond_true35
-cond_true35:		; preds = %invcont31
-	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
-invcont37:		; preds = %cond_true35
-	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
-	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
-invcont40:		; preds = %invcont37
-	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
-	br label %cond_next42
-cond_next42:		; preds = %invcont40, %invcont31
-	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
-			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
-invcont45:		; preds = %cond_next42
-	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
-	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
-	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
-			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
-invcont51:		; preds = %invcont45
-	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
-	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
-			to label %invcont57 unwind label %cleanup329
-invcont57:		; preds = %invcont51
-	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
-	%tmp61 = load double* %tmp60		; <double> [#uses=1]
-	store double %tmp61, double* %tmp.upgrd.13
-	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
-	%tmp64 = load double* %tmp63		; <double> [#uses=1]
-	store double %tmp64, double* %tmp62
-	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
-	store double %tmp69, double* %tmp65
-	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
-	store double %tmp75, double* %tmp71
-	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
-			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
-invcont79:		; preds = %invcont57
-	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
-			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
-invcont83:		; preds = %invcont79
-	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
-	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
-	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
-	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
-	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
-			to label %cond_next194 unwind label %cleanup329
-cond_false:		; preds = %bb21
-	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
-	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
-			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
-invcont94:		; preds = %cond_false
-	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont98:		; preds = %invcont94
-	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont100:		; preds = %invcont98
-	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
-			to label %invcont103 unwind label %cleanup329
-invcont103:		; preds = %invcont100
-	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont104:		; preds = %invcont103
-	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
-			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
-invcont106:		; preds = %invcont104
-	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
-	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
-	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
-			to label %invcont111 unwind label %cleanup329
-invcont111:		; preds = %invcont106
-	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
-			to label %invcont114 unwind label %cleanup192
-invcont114:		; preds = %invcont111
-	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
-			to label %invcont117 unwind label %cleanup192
-invcont117:		; preds = %invcont114
-	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont118:		; preds = %invcont117
-	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
-			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
-invcont120:		; preds = %invcont118
-	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
-	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont122:		; preds = %invcont120
-	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
-			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
-invcont124:		; preds = %invcont122
-	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
-	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
-	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp128 = load double* %tmp127		; <double> [#uses=1]
-	store double %tmp128, double* %tmp126
-	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp131 = load double* %tmp130		; <double> [#uses=1]
-	store double %tmp131, double* %tmp129
-	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp134 = load double* %tmp133		; <double> [#uses=1]
-	store double %tmp134, double* %tmp132
-	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp137 = load double* %tmp136		; <double> [#uses=1]
-	store double %tmp137, double* %tmp135
-	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
-	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont141:		; preds = %invcont124
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont144 unwind label %cleanup192
-invcont144:		; preds = %invcont141
-	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
-			to label %invcont146 unwind label %cleanup173
-invcont146:		; preds = %invcont144
-	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
-invcont148:		; preds = %invcont146
-	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
-	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
-invcont151:		; preds = %invcont148
-	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
-			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
-invcont153:		; preds = %invcont151
-	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
-	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
-	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
-	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
-	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp163 = load double* %tmp162		; <double> [#uses=1]
-	store double %tmp163, double* %tmp161
-	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp166 = load double* %tmp165		; <double> [#uses=1]
-	store double %tmp166, double* %tmp164
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup171 unwind label %cleanup173
-cleanup168:		; preds = %invcont151, %invcont148, %invcont146
-        %val168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup173 unwind label %cleanup173
-cleanup171:		; preds = %invcont153
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %finally170 unwind label %cleanup192
-cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
-        %val173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %cleanup192 unwind label %cleanup192
-finally170:		; preds = %cleanup171
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont177 unwind label %cleanup192
-invcont177:		; preds = %finally170
-	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
-			to label %invcont179 unwind label %cleanup187
-invcont179:		; preds = %invcont177
-	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
-			to label %invcont181 unwind label %cleanup187
-invcont181:		; preds = %invcont179
-	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
-	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
-			to label %cleanup185 unwind label %cleanup187
-cleanup185:		; preds = %invcont181
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup190 unwind label %cleanup192
-cleanup187:		; preds = %invcont181, %invcont179, %invcont177
-        %val187 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup192 unwind label %cleanup192
-cleanup190:		; preds = %cleanup185
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cond_next194 unwind label %cleanup329
-cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
-        %val192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cleanup329 unwind label %cleanup329
-cond_next194:		; preds = %cleanup190, %invcont83
-	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
-	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
-	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
-invcont196:		; preds = %cond_next194
-	br i1 %tmp197, label %cond_true200, label %cond_false204
-cond_true200:		; preds = %invcont196
-	%tmp2000 = load double* %foo
-	store double %tmp2000, double* %bar
-	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
-cond_false204:		; preds = %invcont196
-	%tmp2001 = load double* %foo
-	store double %tmp2001, double* %bar
-	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
-cond_next208:		; preds = %invcont206, %invcont202
-	%pageCopies.0 = phi i32 [ %tmp203, %cond_true200 ], [ 1, %cond_false204 ]		; <i32> [#uses=2]
-	%docCopies.0 = phi i32 [ 1, %cond_true200 ], [ %tmp207, %cond_false204 ]		; <i32> [#uses=2]
-	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
-			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
-invcont210:		; preds = %cond_next208
-	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
-			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
-invcont213:		; preds = %invcont210
-	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
-	br i1 %tmp216, label %cond_true217, label %cond_next225
-cond_true217:		; preds = %invcont213
-	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
-	br i1 %tmp219, label %cond_true220, label %cond_next225
-cond_true220:		; preds = %cond_true217
-	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
-			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
-invcont222:		; preds = %cond_true220
-	br label %cond_next225
-cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
-	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
-	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
-	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
-			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
-invcont227:		; preds = %cond_next225
-	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
-	br i1 %tmp228, label %cond_true230, label %cond_next234
-cond_true230:		; preds = %invcont227
-	br label %cond_next234
-cond_next234:		; preds = %cond_true230, %invcont227
-	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
-	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
-	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
-	br label %bb309
-bb237:		; preds = %cond_true313, %cond_next293
-	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
-	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
-	br label %bb273
-invcont240:		; preds = %cond_true277
-	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
-	br i1 %tmp242, label %bb252, label %cond_next244
-cond_next244:		; preds = %invcont240
-	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
-invcont246:		; preds = %cond_next244
-	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
-	br i1 %tmp248, label %bb252, label %bb253
-bb252:		; preds = %invcont246, %invcont240
-	br label %bb254
-bb253:		; preds = %invcont246
-	br label %bb254
-bb254:		; preds = %bb253, %bb252
-	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
-	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
-cond_next258:		; preds = %bb254
-	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
-			to label %invcont261 unwind label %cleanup329
-invcont261:		; preds = %cond_next258
-	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
-	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
-	br i1 %tmp265, label %cond_true266, label %cond_next270
-cond_true266:		; preds = %invcont261
-	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next270:		; preds = %cond_true266, %invcont261
-	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
-	br label %bb273
-bb273:		; preds = %cond_next270, %bb237
-	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
-	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
-	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
-	br i1 %tmp276, label %cond_true277, label %bb280
-cond_true277:		; preds = %bb273
-	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
-bb280:		; preds = %bb273
-	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
-	br i1 %tmp283, label %bb297, label %cond_next285
-cond_next285:		; preds = %bb280
-	br i1 %ascending.1, label %cond_true287, label %cond_false290
-cond_true287:		; preds = %cond_next285
-	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_false290:		; preds = %cond_next285
-	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_next293:		; preds = %cond_false290, %cond_true287
-	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
-	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
-bb297:		; preds = %bb280
-	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
-	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
-	br i1 %tmp301, label %cond_true302, label %cond_next306
-cond_true302:		; preds = %bb297
-	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next306:		; preds = %cond_true302, %bb297
-	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
-	br label %bb309
-bb309:		; preds = %cond_next306, %cond_next234
-	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
-	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
-	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
-	br i1 %tmp312, label %cond_true313, label %UserCanceled
-cond_true313:		; preds = %bb309
-	br label %bb237
-UserCanceled:		; preds = %bb309, %bb254
-	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
-	br i1 %tmp318, label %cleanup327, label %cond_true319
-cond_true319:		; preds = %UserCanceled
-	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
-	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
-	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
-	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
-	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
-			to label %cleanup327 unwind label %cleanup329
-cleanup327:		; preds = %cond_true319, %UserCanceled
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup328:		; preds = %invcont
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
-        %val329 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	resume { i8*, i32 } %val329
-}
-
-declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
-
-declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
-
-declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
-
-declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
-
-declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
-
-declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
-
-declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
-
-declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
-
-declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
-
-declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
-
-declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
-
-declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
-
-declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
-
-declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
-
-declare void @_ZN5QFontD1Ev(%struct.QFont*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
-
-declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
-
-declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
-
-declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
-
-declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
-
-declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
-
-declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
-
-declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
-
-declare i32 @_Z13qt_defaultDpiv()
-
-declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
-
-declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
-
-declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
-
-declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
-
-declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
-
-declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
-
-declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
-
-declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
-
-declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/BrUnwind.ll b/test/Transforms/SimplifyCFG/BrUnwind.ll
index 7ab8faa..1485364 100644
--- a/test/Transforms/SimplifyCFG/BrUnwind.ll
+++ b/test/Transforms/SimplifyCFG/BrUnwind.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN: not grep {br label}
+; RUN: not grep "br label"
 
 define void @test(i1 %C) {
         br i1 %C, label %A, label %B
diff --git a/test/Transforms/SimplifyCFG/DeadSetCC.ll b/test/Transforms/SimplifyCFG/DeadSetCC.ll
index 8339462..c625600 100644
--- a/test/Transforms/SimplifyCFG/DeadSetCC.ll
+++ b/test/Transforms/SimplifyCFG/DeadSetCC.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep {icmp eq}
+; RUN:   not grep "icmp eq"
 
 ; Check that simplifycfg deletes a dead 'seteq' instruction when it
 ; folds a conditional branch into a switch instruction.
diff --git a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
index bf9d953..b6d54d3 100644
--- a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
+++ b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
@@ -3,7 +3,7 @@
 ; important case.  This is basically the most trivial form of tail-duplication.
 
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:    not grep {br label}
+; RUN:    not grep "br label"
 
 define i32 @test(i1 %B, i32 %A, i32 %B.upgrd.1) {
         br i1 %B, label %T, label %F
diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll
index 2b29681..7097dea 100644
--- a/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -17,3 +17,54 @@ b:
 c:
         ret void
 }
+
+; rdar://10554090
+define zeroext i1 @test2(i64 %i0, i64 %i1) nounwind uwtable readonly ssp {
+entry:
+; CHECK: test2
+; CHECK: br i1
+  %and.i.i = and i64 %i0, 281474976710655
+  %and.i11.i = and i64 %i1, 281474976710655
+  %or.cond = icmp eq i64 %and.i.i, %and.i11.i
+  br i1 %or.cond, label %c, label %a
+
+a:
+; CHECK: br
+  %shr.i4.i = lshr i64 %i0, 48
+  %and.i5.i = and i64 %shr.i4.i, 32767
+  %shr.i.i = lshr i64 %i1, 48
+  %and.i2.i = and i64 %shr.i.i, 32767
+  %cmp9.i = icmp ult i64 %and.i5.i, %and.i2.i
+  br i1 %cmp9.i, label %c, label %b
+
+b:
+; CHECK-NOT: br
+  %shr.i13.i9 = lshr i64 %i1, 48
+  %and.i14.i10 = and i64 %shr.i13.i9, 32767
+  %shr.i.i11 = lshr i64 %i0, 48
+  %and.i11.i12 = and i64 %shr.i.i11, 32767
+  %phitmp = icmp uge i64 %and.i14.i10, %and.i11.i12
+  br label %c
+
+c:
+  %o2 = phi i1 [ false, %a ], [ %phitmp, %b ], [ false, %entry ]
+  ret i1 %o2
+}
+
+; PR13180
+define void @pr13180(i8 %p) {
+entry:
+  %tobool = icmp eq i8 %p, 0
+  br i1 %tobool, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %phitmp = icmp eq i8 %p, 0
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ undef, %cond.true ], [ %phitmp, %cond.false ]
+  unreachable
+}
diff --git a/test/Transforms/SimplifyCFG/branch-phi-thread.ll b/test/Transforms/SimplifyCFG/branch-phi-thread.ll
index f52d979..c19ba69 100644
--- a/test/Transforms/SimplifyCFG/branch-phi-thread.ll
+++ b/test/Transforms/SimplifyCFG/branch-phi-thread.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -adce -S | \
-; RUN:   not grep {call void @f1}
+; RUN:   not grep "call void @f1"
 ; END.
 
 declare void @f1()
diff --git a/test/Transforms/SimplifyCFG/duplicate-phis.ll b/test/Transforms/SimplifyCFG/duplicate-phis.ll
index 5129f9fb..4788406 100644
--- a/test/Transforms/SimplifyCFG/duplicate-phis.ll
+++ b/test/Transforms/SimplifyCFG/duplicate-phis.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -simplifycfg -S | grep { = phi } | count 1
+; RUN: opt < %s -instcombine -simplifycfg -S | grep " = phi " | count 1
 
 ; instcombine should sort the PHI operands so that simplifycfg can see the
 ; duplicate and remove it.
diff --git a/test/Transforms/SimplifyCFG/invoke.ll b/test/Transforms/SimplifyCFG/invoke.ll
new file mode 100644
index 0000000..10dc41b
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/invoke.ll
@@ -0,0 +1,139 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare void @purefn() nounwind readnone
+declare i32 @read_only() nounwind readonly
+declare i32 @nounwind_fn() nounwind
+declare i32 @fn()
+
+
+; CHECK: @f1
+define i8* @f1() nounwind uwtable ssp {
+entry:
+; CHECK: call void @llvm.trap()
+; CHECK: unreachable
+  %call = invoke noalias i8* undef()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i8* %call
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f2
+define i8* @f2() nounwind uwtable ssp {
+entry:
+; CHECK: call void @llvm.trap()
+; CHECK: unreachable
+  %call = invoke noalias i8* null()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i8* %call
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f3
+define i32 @f3() nounwind uwtable ssp {
+; CHECK-NEXT: entry
+entry:
+; CHECK-NEXT: ret i32 3
+  %call = invoke i32 @read_only()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 3
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f4
+define i32 @f4() nounwind uwtable ssp {
+; CHECK-NEXT: entry
+entry:
+; CHECK-NEXT: call i32 @read_only()
+  %call = invoke i32 @read_only()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK-NEXT: ret i32 %call
+  ret i32 %call
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f5
+define i32 @f5(i1 %cond, i8* %a, i8* %b) {
+entry:
+  br i1 %cond, label %x, label %y
+
+x:
+; CHECK: invoke i32 @fn()
+  %call = invoke i32 @fn()
+          to label %cont unwind label %lpad
+
+y:
+; CHECK: call i32 @nounwind_fn()
+  %call2 = invoke i32 @nounwind_fn()
+           to label %cont unwind label %lpad
+
+cont:
+; CHECK: phi i32
+; CHECK: ret i32 %phi
+  %phi = phi i32 [%call, %x], [%call2, %y]
+  ret i32 %phi
+
+lpad:
+; CHECK-NOT: phi
+  %phi2 = phi i8* [%a, %x], [%b, %y]
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+; CHECK: __cxa_call_unexpected(i8* %a)
+  tail call void @__cxa_call_unexpected(i8* %phi2) noreturn nounwind
+  unreachable
+}
+
+; CHECK: @f6
+define void @f6() {
+entry:
+  invoke void @purefn()
+          to label %invoke.cont1 unwind label %lpad
+
+invoke.cont1:
+  %foo = invoke i32 @fn()
+          to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  ret void
+
+lpad:
+; CHECK-NOT: phi
+  %tmp = phi i8* [ null, %invoke.cont1 ], [ null, %entry ]
+  landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/switch_thread.ll b/test/Transforms/SimplifyCFG/switch_thread.ll
index bd85fcc..9396684 100644
--- a/test/Transforms/SimplifyCFG/switch_thread.ll
+++ b/test/Transforms/SimplifyCFG/switch_thread.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep {call void @DEAD}
+; RUN:   not grep "call void @DEAD"
 
 ; Test that we can thread a simple known condition through switch statements.
 
diff --git a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
index 8e9f206..ae917f7 100644
--- a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
+++ b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
@@ -1,8 +1,8 @@
 ; PR1307
 ; RUN: opt < %s -simplify-libcalls -instcombine -S > %t
-; RUN: grep {@str,.*i64 3} %t
-; RUN: grep {@str1,.*i64 7} %t
-; RUN: grep {ret i8.*null} %t
+; RUN: grep "@str,.*i64 3" %t
+; RUN: grep "@str1,.*i64 7" %t
+; RUN: grep "ret i8.*null" %t
 ; END.
 
 @str = internal constant [5 x i8] c"foog\00"
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
index f8a0c88..2717228 100644
--- a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
@@ -1,14 +1,14 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: grep nocapture %t | count 2
-; RUN: grep null %t | grep nocapture | count 1
-; RUN: grep null %t | grep call | not grep readonly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
 ; Test that we add nocapture to the declaration, and to the second call only.
 
+; CHECK: declare float @strtol(i8*, i8** nocapture, i32) nounwind
 declare float @strtol(i8* %s, i8** %endptr, i32 %base)
 
 define void @foo(i8* %x, i8** %endptr) {
+; CHECK:  call float @strtol(i8* %x, i8** %endptr, i32 10)
   call float @strtol(i8* %x, i8** %endptr, i32 10)
+; CHECK: %2 = call float @strtol(i8* nocapture %x, i8** null, i32 10)
   call float @strtol(i8* %x, i8** null, i32 10)
   ret void
 }
diff --git a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
index 9056499..f4c80ed 100644
--- a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -instcombine -S | grep {ret i32 -65}
+; RUN: opt < %s -simplify-libcalls -instcombine -S | grep "ret i32 -65"
 ; PR4284
 
 define i32 @test() nounwind {
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
index ab45f18..e38d783 100644
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ b/test/Transforms/SimplifyLibCalls/FFS.ll
@@ -1,6 +1,6 @@
 ; Test that the ToAsciiOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*@ffs}
+; RUN:   not grep "call.*@ffs"
 
 @non_const = external global i32		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll
index 4a0d232..51733e4 100644
--- a/test/Transforms/SimplifyLibCalls/FPrintF.ll
+++ b/test/Transforms/SimplifyLibCalls/FPrintF.ll
@@ -1,6 +1,6 @@
 ; Test that the FPrintFOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*fprintf}
+; RUN:   not grep "call.*fprintf"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll
index 1f72ede..aa01aba 100644
--- a/test/Transforms/SimplifyLibCalls/FPuts.ll
+++ b/test/Transforms/SimplifyLibCalls/FPuts.ll
@@ -1,6 +1,6 @@
 ; Test that the FPutsOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*fputs}
+; RUN:   not grep "call.*fputs"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
index c711178..1faad03 100644
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -constprop -instcombine -S | not grep {call.*llvm.memcpy.i32}
+; RUN: opt < %s -constprop -instcombine -S | not grep "call.*llvm.memcpy.i32"
 
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll
index 847e363..514a7d9 100644
--- a/test/Transforms/SimplifyLibCalls/SPrintF.ll
+++ b/test/Transforms/SimplifyLibCalls/SPrintF.ll
@@ -1,6 +1,6 @@
 ; Test that the SPrintFOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*sprintf}
+; RUN:   not grep "call.*sprintf"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/StpCpy.ll b/test/Transforms/SimplifyLibCalls/StpCpy.ll
new file mode 100644
index 0000000..914b095
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/StpCpy.ll
@@ -0,0 +1,43 @@
+; Test that the StpCpyOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+
+declare i8* @stpcpy(i8*, i8*)
+
+declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+define i32 @t1() {
+; CHECK: @t1
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @stpcpy( i8* %arg1, i8* %arg2 )
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  ret i32 0
+}
+
+define i32 @t2() {
+; CHECK: @t2
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
+  %rslt1 = call i8* @__stpcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
+; CHECK: @__memcpy_chk
+  ret i32 0
+}
+
+define i8* @t3(i8* %arg) {
+; CHECK: @t3
+  %stpcpy = tail call i8* @stpcpy(i8* %arg, i8* %arg)
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen(i8* %arg)
+; CHECK-NEXT: getelementptr inbounds i8* %arg, i32 [[LEN]]
+  ret i8* %stpcpy
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll
index 4e3d0ab..3ea691a 100644
--- a/test/Transforms/SimplifyLibCalls/StrCat.ll
+++ b/test/Transforms/SimplifyLibCalls/StrCat.ll
@@ -1,9 +1,9 @@
 ; Test that the StrCatOptimizer works correctly
 ; PR3661
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*strcat}
+; RUN:   not grep "call.*strcat"
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep {puts.*%arg1}
+; RUN:   grep "puts.*%arg1"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll
index acd8aaf..4a20bbd 100644
--- a/test/Transforms/SimplifyLibCalls/StrLen.ll
+++ b/test/Transforms/SimplifyLibCalls/StrLen.ll
@@ -1,6 +1,6 @@
 ; Test that the StrCatOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:    not grep {call.*strlen}
+; RUN:    not grep "call.*strlen"
 
 target datalayout = "e-p:32:32"
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=3]
diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll
index d09c022..073792b 100644
--- a/test/Transforms/SimplifyLibCalls/StrNCat.ll
+++ b/test/Transforms/SimplifyLibCalls/StrNCat.ll
@@ -1,8 +1,8 @@
 ; Test that the StrNCatOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*strncat}
+; RUN:   not grep "call.*strncat"
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep {puts.*%arg1}
+; RUN:   grep "puts.*%arg1"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/test/Transforms/SimplifyLibCalls/StrNCpy.ll
index c8af3ca..4e47b31 100644
--- a/test/Transforms/SimplifyLibCalls/StrNCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/StrNCpy.ll
@@ -1,6 +1,6 @@
 ; Test that the StrNCpyOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*strncpy}
+; RUN:   not grep "call.*strncpy"
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll
index e2b5683..aef4733 100644
--- a/test/Transforms/SimplifyLibCalls/ToAscii.ll
+++ b/test/Transforms/SimplifyLibCalls/ToAscii.ll
@@ -1,6 +1,6 @@
 ; Test that the ToAsciiOptimizer works correctly
 ; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*toascii}
+; RUN:   not grep "call.*toascii"
 
 declare i32 @toascii(i32)
 
diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll
index 6fbe0b9..3934a5b 100644
--- a/test/Transforms/SimplifyLibCalls/abs.ll
+++ b/test/Transforms/SimplifyLibCalls/abs.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | grep {select i1 %ispos}
+; RUN: opt < %s -simplify-libcalls -S | grep "select i1 %ispos"
 ; PR2337
 
 define i32 @test(i32 %x) {
diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll
index 2f5d910..a592775 100644
--- a/test/Transforms/SimplifyLibCalls/exp2.ll
+++ b/test/Transforms/SimplifyLibCalls/exp2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | grep {call.*ldexp} | count 4
+; RUN: opt < %s -simplify-libcalls -S | grep "call.*ldexp" | count 4
 ; rdar://5852514
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/SimplifyLibCalls/memmove.ll b/test/Transforms/SimplifyLibCalls/memmove.ll
index c0c0050..5aaeeeb 100644
--- a/test/Transforms/SimplifyLibCalls/memmove.ll
+++ b/test/Transforms/SimplifyLibCalls/memmove.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memmove}
+; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memmove"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/SimplifyLibCalls/memset-64.ll b/test/Transforms/SimplifyLibCalls/memset-64.ll
index fb752c4..92412de 100644
--- a/test/Transforms/SimplifyLibCalls/memset-64.ll
+++ b/test/Transforms/SimplifyLibCalls/memset-64.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
+; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset"
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux-gnu"
 
diff --git a/test/Transforms/SimplifyLibCalls/memset.ll b/test/Transforms/SimplifyLibCalls/memset.ll
index 0aede06..853215a 100644
--- a/test/Transforms/SimplifyLibCalls/memset.ll
+++ b/test/Transforms/SimplifyLibCalls/memset.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
+; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll
index f8364f7..f0964e7 100644
--- a/test/Transforms/SimplifyLibCalls/pow2.ll
+++ b/test/Transforms/SimplifyLibCalls/pow2.ll
@@ -1,6 +1,6 @@
 ; Testcase for calls to the standard C "pow" function
 ;
-; RUN: opt < %s -simplify-libcalls -S | not grep {call .pow}
+; RUN: opt < %s -simplify-libcalls -S | not grep "call .pow"
 
 
 declare double @pow(double, double)
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 4c531d8..1d0b6b5 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -36,3 +36,29 @@ true:
 false:
   ret i32 0
 }
+
+; Sink to the nearest post-dominator
+
+;      CHECK: @diamond
+;      CHECK: X:
+; CHECK-NEXT: phi
+; CHECK-NEXT: mul nsw
+; CHECK-NEXT: sub
+
+define i32 @diamond(i32 %a, i32 %b, i32 %c) {
+  %1 = mul nsw i32 %c, %b
+  %2 = icmp sgt i32 %a, 0
+  br i1 %2, label %B0, label %B1
+
+B0:                                       ; preds = %0
+  br label %X
+
+B1:                                      ; preds = %0
+  br label %X
+
+X:                                     ; preds = %5, %3
+  %.01 = phi i32 [ %c, %B0 ], [ %a, %B1 ]
+  %R = sub i32 %1, %.01
+  ret i32 %R
+}
+
diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll
index 0c140ad..5b5dbcc 100644
--- a/test/Transforms/TailCallElim/ackermann.ll
+++ b/test/Transforms/TailCallElim/ackermann.ll
@@ -1,5 +1,5 @@
 ; This function contains two tail calls, which should be eliminated
-; RUN: opt < %s -tailcallelim -stats -disable-output |& grep {2 tailcallelim}
+; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim"
 
 define i32 @Ack(i32 %M.1, i32 %N.1) {
 entry:
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index 5cc92e1..e4f8b48 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep {call i32 @foo}
+; RUN:    grep "call i32 @foo"
 
 declare void @bar(i32*)
 
diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll
index 9363880..42ac2f9 100644
--- a/test/Transforms/TailCallElim/dup_tail.ll
+++ b/test/Transforms/TailCallElim/dup_tail.ll
@@ -1,5 +1,5 @@
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt %s -tailcallelim -stats -disable-output |& grep {Number of return duplicated}
+; RUN: opt %s -tailcallelim -stats -disable-output 2>&1 | grep "Number of return duplicated"
 
 define i32 @fib(i32 %n) nounwind ssp {
 entry:
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 3dddb01..3d01d17 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep {tail call void @foo}
+; RUN:    grep "tail call void @foo"
 
 
 declare void @foo()
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index 03e99bc..7853d7b 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate
+; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate
 ; XFAIL: *
 
 define i32 @foo(i32 %l) nounwind  {
diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll
index 4c1ddd1..af46839 100644
--- a/test/Verifier/2002-04-13-RetTypes.ll
+++ b/test/Verifier/2002-04-13-RetTypes.ll
@@ -1,6 +1,6 @@
-; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'i32'}
+; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'i32'"
 
-; Verify the the operand type of the ret instructions in a function match the
+; Verify the operand type of the ret instructions in a function match the
 ; delcared return type of the function they live in.
 ;
 
diff --git a/test/Verifier/2002-11-05-GetelementptrPointers.ll b/test/Verifier/2002-11-05-GetelementptrPointers.ll
index 1f71387..108ae5f 100644
--- a/test/Verifier/2002-11-05-GetelementptrPointers.ll
+++ b/test/Verifier/2002-11-05-GetelementptrPointers.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {invalid getelementptr indices}
+; RUN: not llvm-as < %s 2>&1 | grep "invalid getelementptr indices"
 
 ; This testcase is invalid because we are indexing into a pointer that is 
 ; contained WITHIN a structure.
diff --git a/test/Verifier/2006-07-11-StoreStruct.ll b/test/Verifier/2006-07-11-StoreStruct.ll
index 80ab122..65b229d 100644
--- a/test/Verifier/2006-07-11-StoreStruct.ll
+++ b/test/Verifier/2006-07-11-StoreStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s |& not grep {Instruction operands must be first-class}
+; RUN: llvm-as < %s 2>&1 | not grep "Instruction operands must be first-class"
 
 ; This previously was for PR826, but structs are now first-class so
 ; the following is now valid.
diff --git a/test/Verifier/2006-10-15-AddrLabel.ll b/test/Verifier/2006-10-15-AddrLabel.ll
index 0b73b47..c8fedb5 100644
--- a/test/Verifier/2006-10-15-AddrLabel.ll
+++ b/test/Verifier/2006-10-15-AddrLabel.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s > /dev/null |& grep {basic block pointers are invalid}
+; RUN: not llvm-as < %s > /dev/null 2> %t
+; RUN: grep "basic block pointers are invalid" %t
 
 define i32 @main() {
          %foo  = call i8* %llvm.stacksave()
diff --git a/test/Verifier/2006-12-12-IntrinsicDefine.ll b/test/Verifier/2006-12-12-IntrinsicDefine.ll
index 8d09b51..6e7468c 100644
--- a/test/Verifier/2006-12-12-IntrinsicDefine.ll
+++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {llvm intrinsics cannot be defined}
+; RUN: not llvm-as < %s 2>&1 | grep "llvm intrinsics cannot be defined"
 ; PR1047
 
 define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
diff --git a/test/Verifier/2008-03-01-AllocaSized.ll b/test/Verifier/2008-03-01-AllocaSized.ll
index 079a75d..51258be 100644
--- a/test/Verifier/2008-03-01-AllocaSized.ll
+++ b/test/Verifier/2008-03-01-AllocaSized.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null |& grep {Cannot allocate unsized type}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Cannot allocate unsized type"
 ; PR2113
 
 define void @test() {
diff --git a/test/Verifier/2008-08-22-MemCpyAlignment.ll b/test/Verifier/2008-08-22-MemCpyAlignment.ll
index aaf69ae..c6d5afd 100644
--- a/test/Verifier/2008-08-22-MemCpyAlignment.ll
+++ b/test/Verifier/2008-08-22-MemCpyAlignment.ll
@@ -1,11 +1,11 @@
-; RUN: not llvm-as %s -o /dev/null |& grep {alignment argument of memory intrinsics must be a constant int}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "alignment argument of memory intrinsics must be a constant int"
 ; PR2318
 
 define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind  {
 entry:
-        tail call void @llvm.memcpy.i64( i8* %a, i8* %src, i64 %len, i32 %align) nounwind 
+        tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %src, i64 %len, i32 %align, i1 false) nounwind 
         ret void
 }
 
-declare void @llvm.memcpy.i64( i8* %a, i8* %src, i64 %len, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %src, i64 %len, i32, i1)
 
diff --git a/test/Verifier/2008-11-15-RetVoid.ll b/test/Verifier/2008-11-15-RetVoid.ll
index aaef703..42503fa 100644
--- a/test/Verifier/2008-11-15-RetVoid.ll
+++ b/test/Verifier/2008-11-15-RetVoid.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'void'}
+; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'void'"
 
 define void @foo() {
   ret i32 0
diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll
index bf5563d..3136c61 100644
--- a/test/Verifier/2010-08-07-PointerIntrinsic.ll
+++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as < %s 2> %t
-; RUN: grep {Broken module} %t
+; RUN: grep "Broken module" %t
 ; PR7316
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
diff --git a/test/Verifier/AmbiguousPhi.ll b/test/Verifier/AmbiguousPhi.ll
index 9a72530..f31bc10 100644
--- a/test/Verifier/AmbiguousPhi.ll
+++ b/test/Verifier/AmbiguousPhi.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {multiple entries for the same basic block}
+; RUN: not llvm-as < %s 2>&1 | grep "multiple entries for the same basic block"
 
 
 
diff --git a/test/Verifier/PhiGrouping.ll b/test/Verifier/PhiGrouping.ll
index dc529dc..7b42fd2 100644
--- a/test/Verifier/PhiGrouping.ll
+++ b/test/Verifier/PhiGrouping.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {PHI nodes not grouped at top}
+; RUN: not llvm-as < %s 2>&1 | grep "PHI nodes not grouped at top"
 
 
 
diff --git a/test/Verifier/SelfReferential.ll b/test/Verifier/SelfReferential.ll
index 70154b7..c24c0eb 100644
--- a/test/Verifier/SelfReferential.ll
+++ b/test/Verifier/SelfReferential.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null |& grep {Only PHI nodes may reference their own value}
+; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Only PHI nodes may reference their own value"
 
 ; Test that self referential instructions are not allowed
 
diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll
index fc5ef1c..a52e796 100644
--- a/test/Verifier/aliasing-chain.ll
+++ b/test/Verifier/aliasing-chain.ll
@@ -1,4 +1,4 @@
-; RUN:  not llvm-as %s -o /dev/null |& grep {Aliasing chain should end with function or global variable}
+; RUN:  not llvm-as %s -o /dev/null 2>&1 | grep "Aliasing chain should end with function or global variable"
 
 ; Test that alising chain does not create a cycle
 
diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll
index 48cd061..66c5396 100644
--- a/test/Verifier/cttz-undef-arg.ll
+++ b/test/Verifier/cttz-undef-arg.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
 
 declare i32 @llvm.ctlz.i32(i32, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
diff --git a/test/Verifier/dominates.ll b/test/Verifier/dominates.ll
new file mode 100644
index 0000000..17e2c33
--- /dev/null
+++ b/test/Verifier/dominates.ll
@@ -0,0 +1,57 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define i32 @f1(i32 %x) {
+       %y = add i32 %z, 1
+       %z = add i32 %x, 1
+       ret i32 %y
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %z = add i32 %x, 1
+; CHECK-NEXT:  %y = add i32 %z, 1
+}
+
+declare i32 @g()
+define void @f2(i32 %x) {
+bb0:
+  %y1 = invoke i32 @g() to label %bb1 unwind label %bb2
+bb1:
+  ret void
+bb2:
+  %y2 = phi i32 [%y1, %bb0]
+  %y3 = landingpad i32 personality i32 ()* @g
+          cleanup
+  ret void
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %y1 = invoke i32 @g()
+; CHECK-NEXT:        to label %bb1 unwind label %bb2
+; CHECK-NEXT:  %y2 = phi i32 [ %y1, %bb0 ]
+}
+
+define void @f3(i32 %x) {
+bb0:
+  %y1 = invoke i32 @g() to label %bb1 unwind label %bb2
+bb1:
+  ret void
+bb2:
+  %y2 = landingpad i32 personality i32 ()* @g
+          cleanup
+  br label %bb3
+bb3:
+  %y3 = phi i32 [%y1, %bb2]
+  ret void
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %y1 = invoke i32 @g()
+; CHECK-NEXT:          to label %bb1 unwind label %bb2
+; CHECK-NEXT:  %y3 = phi i32 [ %y1, %bb2 ]
+}
+
+define void @f4(i32 %x) {
+bb0:
+  br label %bb1
+bb1:
+  %y3 = phi i32 [%y1, %bb0]
+  %y1 = add i32 %x, 1
+  ret void
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %y1 = add i32 %x, 1
+; CHECK-NEXT:  %y3 = phi i32 [ %y1, %bb0 ]
+}
diff --git a/test/Verifier/fpmath.ll b/test/Verifier/fpmath.ll
index b764a63..7002c5c 100644
--- a/test/Verifier/fpmath.ll
+++ b/test/Verifier/fpmath.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& FileCheck %s
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 define void @fpmath1(i32 %i, float %f, <2 x float> %g) {
   %s = add i32 %i, %i, !fpmath !0
diff --git a/test/Verifier/invoke-1.ll b/test/Verifier/invoke-1.ll
deleted file mode 100644
index 427abe0..0000000
--- a/test/Verifier/invoke-1.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not llvm-as < %s |& grep {not verify as correct}
-; PR1042
-
-define i32 @foo() {
-	%A = invoke i32 @foo( )
-			to label %L unwind label %L		; <i32> [#uses=1]
-L:		; preds = %0, %0
-	ret i32 %A
-}
-
diff --git a/test/Verifier/invoke-2.ll b/test/Verifier/invoke-2.ll
deleted file mode 100644
index 0145935..0000000
--- a/test/Verifier/invoke-2.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: not llvm-as %s |& grep {not verify as correct}
-; PR1042
-
-define i32 @foo() {
-	br i1 false, label %L1, label %L2
-L1:		; preds = %0
-	%A = invoke i32 @foo( )
-			to label %L unwind label %L		; <i32> [#uses=1]
-L2:		; preds = %0
-	br label %L
-L:		; preds = %L2, %L1, %L1
-	ret i32 %A
-}
-
diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll
new file mode 100644
index 0000000..a48f9b6
--- /dev/null
+++ b/test/Verifier/invoke.ll
@@ -0,0 +1,80 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; PR1042
+define i32 @foo() {
+; CHECK: The unwind destination does not have a landingpad instruction
+	%A = invoke i32 @foo( )
+			to label %L unwind label %L		; <i32> [#uses=1]
+L:		; preds = %0, %0
+	ret i32 %A
+}
+
+; PR1042
+define i32 @bar() {
+	br i1 false, label %L1, label %L2
+L1:		; preds = %0
+	%A = invoke i32 @bar( )
+			to label %L unwind label %L		; <i32> [#uses=1]
+L2:		; preds = %0
+	br label %L
+L:		; preds = %L2, %L1, %L1
+; CHECK: The unwind destination does not have a landingpad instruction
+; CHECK: Instruction does not dominate all uses
+	ret i32 %A
+}
+
+
+declare i32 @__gxx_personality_v0(...)
+declare void @llvm.donothing()
+declare void @llvm.trap()
+declare i8 @llvm.expect.i8(i8,i8)
+declare i32 @fn(i8 (i8, i8)*)
+
+define void @f1() {
+entry:
+; OK
+  invoke void @llvm.donothing()
+  to label %conta unwind label %contb
+
+conta:
+  ret void
+
+contb:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
+
+define i8 @f2() {
+entry:
+; CHECK: Cannot invoke an intrinsinc other than donothing
+  invoke void @llvm.trap()
+  to label %cont unwind label %lpad
+
+cont:
+  ret i8 3
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i8 2
+}
+
+define i32 @f3() {
+entry:
+; CHECK: Cannot take the address of an intrinsic
+  %call = call i32 @fn(i8 (i8, i8)* @llvm.expect.i8)
+  ret i32 %call
+}
+
+define void @f4() {
+entry:
+  invoke void @llvm.donothing()
+  to label %cont unwind label %cont
+
+cont:
+; CHECK: Block containing LandingPadInst must be jumped to only by the unwind edge of an invoke.
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
index 611933a..b6a75d1 100644
--- a/test/Verifier/range-1.ll
+++ b/test/Verifier/range-1.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
 
 define void @f1(i8* %x) {
 entry:
@@ -76,3 +76,67 @@ entry:
 }
 !8 = metadata !{i8 0, i8 0}
 ; CHECK: Range must not be empty!
+
+define i8 @f10(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !9
+  ret i8 %y
+}
+!9 = metadata !{i8 0, i8 2, i8 1, i8 3}
+; CHECK: Intervals are overlapping
+
+define i8 @f11(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !10
+  ret i8 %y
+}
+!10 = metadata !{i8 0, i8 2, i8 2, i8 3}
+; CHECK: Intervals are contiguous
+
+define i8 @f12(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !11
+  ret i8 %y
+}
+!11 = metadata !{i8 1, i8 2, i8 -1, i8 0}
+; CHECK: Intervals are not in order
+
+define i8 @f13(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !12
+  ret i8 %y
+}
+!12 = metadata !{i8 1, i8 3, i8 5, i8 1}
+; CHECK: Intervals are contiguous
+
+define i8 @f14(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !13
+  ret i8 %y
+}
+!13 = metadata !{i8 1, i8 3, i8 5, i8 2}
+; CHECK: Intervals are overlapping
+
+define i8 @f15(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !14
+  ret i8 %y
+}
+!14 = metadata !{i8 10, i8 1, i8 12, i8 13}
+; CHECK: Intervals are overlapping
+
+define i8 @f16(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !16
+  ret i8 %y
+}
+!16 = metadata !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 2}
+; CHECK: Intervals are overlapping
+
+define i8 @f17(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !17
+  ret i8 %y
+}
+!17 = metadata !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 1}
+; CHECK: Intervals are contiguous
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
index ef542c8..8d85d19 100644
--- a/test/Verifier/range-2.ll
+++ b/test/Verifier/range-2.ll
@@ -20,3 +20,17 @@ entry:
   ret i8 %y
 }
 !2 = metadata !{i8 1, i8 3, i8 5, i8 42}
+
+define i8 @f4(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !3
+  ret i8 %y
+}
+!3 = metadata !{i8 -1, i8 0, i8 1, i8 2}
+
+define i8 @f5(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !4
+  ret i8 %y
+}
+!4 = metadata !{i8 -1, i8 0, i8 1, i8 -2}
diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data
index b306bdb..028f41b 100644
--- a/test/YAMLParser/spec-05-02-utf8.data
+++ b/test/YAMLParser/spec-05-02-utf8.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 
 # Invalid use of BOM
 # inside a
diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data
index 6788f0b..bab2c1b 100644
--- a/test/YAMLParser/spec-05-10.data
+++ b/test/YAMLParser/spec-05-10.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 
 commercial-at: @text
 grave-accent: `text
diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data
index 7dadff7..eedfc08 100644
--- a/test/YAMLParser/spec-05-12.data
+++ b/test/YAMLParser/spec-05-12.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # We don't currently reject tabs as indentation.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data
index cd8421a..27dbd83 100644
--- a/test/YAMLParser/spec-05-15.data
+++ b/test/YAMLParser/spec-05-15.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 
 Bad escapes:
   "\c
diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data
index 7ca9483..c4a5299 100644
--- a/test/YAMLParser/spec-07-03.data
+++ b/test/YAMLParser/spec-07-03.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 
 %YAML 1.1
 %YAML 1.1
diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data
index 279b54a..f7cff3a 100644
--- a/test/YAMLParser/spec-07-05.data
+++ b/test/YAMLParser/spec-07-05.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # We don't currently parse TAG directives.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data
index f13538b..73c493d 100644
--- a/test/YAMLParser/spec-08-04.data
+++ b/test/YAMLParser/spec-08-04.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # We don't currently look at the content of literal tags.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data
index a811bfd..9844f53 100644
--- a/test/YAMLParser/spec-08-06.data
+++ b/test/YAMLParser/spec-08-06.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # We don't currently validate tags.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data
index f690378..9d8a58c 100644
--- a/test/YAMLParser/spec-09-02.data
+++ b/test/YAMLParser/spec-09-02.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # Indent trimming is not yet implemented.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data
index 890f6bf..a83fcd4 100644
--- a/test/YAMLParser/spec-09-14.data
+++ b/test/YAMLParser/spec-09-14.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # Not quite sure why this doesn't fail.
 # XFAIL: *
diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data
index 2bcc283..6eb7917 100644
--- a/test/YAMLParser/spec-09-21.data
+++ b/test/YAMLParser/spec-09-21.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 
 - |
 
diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data
index 5b981e9..53faeb9 100644
--- a/test/YAMLParser/spec-10-08.data
+++ b/test/YAMLParser/spec-10-08.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s |& FileCheck %s
+# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
 #
 # This fails because even without a key token, some contexts (in this case flow
 # maps) allow implicit null keys, which mix with this in weird ways.
diff --git a/test/lit.cfg b/test/lit.cfg
index d74bc7b..6f44bb3 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -9,8 +9,22 @@ import re
 # name: The name of this test suite.
 config.name = 'LLVM'
 
+# Tweak PATH for Win32 to decide to use bash.exe or not.
+if sys.platform in ['win32']:
+    # Seek sane tools in directories and set to $PATH.
+    path = getattr(config, 'lit_tools_dir', None)
+    path = lit.getToolsPath(path,
+                            config.environment['PATH'],
+                            ['cmp.exe', 'grep.exe', 'sed.exe'])
+    if path is not None:
+        path = os.path.pathsep.join((path,
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
 # testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.TclTest()
+execute_external = (not sys.platform in ['win32']
+                    or lit.getBashPath() not in [None, ""])
+config.test_format = lit.formats.ShTest(execute_external)
 
 # To ignore test output on stderr so it doesn't trigger failures uncomment this:
 #config.test_format = lit.formats.TclTest(ignoreStdErr=True)
@@ -19,21 +33,14 @@ config.test_format = lit.formats.TclTest()
 # set by on_clone().
 config.suffixes = []
 
+# excludes: A list of directories to exclude from the testsuite. The 'Inputs'
+# subdirectories contain auxiliary inputs for various tests in their parent
+# directories.
+config.excludes = ['Inputs']
+
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
 
-# Tweak PATH for Win32
-if sys.platform in ['win32']:
-    # Seek sane tools in directories and set to $PATH.
-    path = getattr(config, 'lit_tools_dir', None)
-    path = lit.getToolsPath(path,
-                            config.environment['PATH'],
-                            ['cmp.exe', 'grep.exe', 'sed.exe'])
-    if path is not None:
-        path = os.path.pathsep.join((path,
-                                     config.environment['PATH']))
-        config.environment['PATH'] = path
-
 # test_exec_root: The root path where tests should be run.
 llvm_obj_root = getattr(config, 'llvm_obj_root', None)
 if llvm_obj_root is not None:
@@ -132,18 +139,6 @@ if config.test_exec_root is None:
 
 ###
 
-# Load site data from DejaGNU's site.exp.
-import re
-site_exp = {}
-# FIXME: Implement lit.site.cfg.
-for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
-    m = re.match('set ([^ ]+) "(.*)"', line)
-    if m:
-        site_exp[m.group(1)] = m.group(2)
-
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
 # When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
 # triple so we can check it with XFAIL and XTARGET.
 config.target_triple += lit.valgrindTriple
@@ -164,9 +159,10 @@ if jit_impl_cfg == 'mcjit':
 else:
   config.substitutions.append( ('%lli', 'lli') )
 
-# Add substitutions.
-for sub in ['link', 'shlibext', 'ocamlopt', 'llvmshlibdir']:
-    config.substitutions.append(('%' + sub, site_exp[sub]))
+# Add site-specific substitutions.
+config.substitutions.append( ('%ocamlopt', config.ocamlopt_executable) )
+config.substitutions.append( ('%llvmshlibdir', config.llvm_shlib_dir) )
+config.substitutions.append( ('%shlibext', config.llvm_shlib_ext) )
 
 # For each occurrence of an llvm tool name as its own word, replace it
 # with the full path to the build directory holding that tool.  This
@@ -187,12 +183,12 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
                 r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
                 r"\bllvm-cov\b",        r"\bllvm-diff\b",
                 r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
-                r"\bllvm-extract\b",    r"\bllvm-ld\b",
+                r"\bllvm-extract\b",
                 r"\bllvm-link\b",       r"\bllvm-mc\b",
                 r"\bllvm-nm\b",         r"\bllvm-objdump\b",
                 r"\bllvm-prof\b",       r"\bllvm-ranlib\b",
                 r"\bllvm-rtdyld\b",     r"\bllvm-shlib\b",
-                r"\bllvm-size\b",       r"\bllvm-stub\b",
+                r"\bllvm-size\b",
                 # Don't match '-llvmc'.
                 r"(?<!-)\bllvmc\b",     r"\blto\b",
                                         # Don't match '.opt', '-opt',
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 8b81186..178b22f 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,14 +1,20 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
+config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvm_shlib_dir = "@SHLIBDIR@"
+config.llvm_shlib_ext = "@SHLIBEXT@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
+config.ocamlopt_executable = "@OCAMLOPT@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
 config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.llvm_bindings = "@LLVM_BINDINGS@"
+config.host_os = "@HOST_OS@"
+config.host_arch = "@HOST_ARCH@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/site.exp.in b/test/site.exp.in
deleted file mode 100644
index cfb2eac..0000000
--- a/test/site.exp.in
+++ /dev/null
@@ -1,16 +0,0 @@
-## Autogenerated by LLVM configuration.
-# Do not edit!
-set target_triplet "@TARGET_TRIPLE@"
-set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
-set llvmshlibdir "@SHLIBDIR@"
-set llvm_bindings "@LLVM_BINDINGS@"
-set srcroot "@LLVM_SOURCE_DIR@"
-set objroot "@LLVM_BINARY_DIR@"
-set srcdir "@LLVM_SOURCE_DIR@"
-set objdir "@LLVM_BINARY_DIR@"
-set link "@TEST_LINK_CMD@"
-set shlibext "@SHLIBEXT@"
-set ocamlopt "@OCAMLOPT@"
-set valgrind "@VALGRIND@"
-set grep "@GREP@"
-set gas "@AS@"