133 files changed, 2006 insertions, 384 deletions
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2e4cb1f..cb90bf6 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -4,7 +4,9 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled
+; RUN: llc < %s -march=x86 -stats 2>&1 | FileCheck %s
+
+; CHECK-NOT: spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index c5c74d1..c4b08a3 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,7 +1,8 @@
 ; PR850
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
-; RUN: grep "movl 4(%eax),%ebp" %t
-; RUN: grep "movl 0(%eax), %ebx" %t
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | FileCheck %s
+
+; CHECK: {{movl 4[(]%eax[)],%ebp}}
+; CHECK: {{movl 0[(]%eax[)], %ebx}}
 
 define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
 	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
index ea2e6db..ba83a8d 100644
--- a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 | grep test.*1
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR1016
 
+; CHECK: {{test.*1}}
+
 define i32 @test(i32 %A, i32 %B, i32 %C) {
         %a = trunc i32 %A to i1         ; <i1> [#uses=1]
         %D = select i1 %a, i32 %B, i32 %C               ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index 18b06dc..366f583 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -mcpu=yonah -march=x86 | \
-; RUN:   grep "cmpltsd %xmm0, %xmm0"
+; RUN: llc < %s -mcpu=yonah -march=x86 | FileCheck %s
+
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
+; CHECK: {{cmpltsd %xmm0, %xmm0}}
 
 define void @acoshf() {
 	%tmp19 = tail call <2 x double> asm sideeffect "pcmpeqd $0, $0 \0A\09 cmpltsd $0, $0", "=x,0,~{dirflag},~{fpsr},~{flags}"( <2 x double> zeroinitializer )		; <<2 x double>> [#uses=0]
diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
index 7528129..648718c 100644
--- a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86-64 | not grep 4294967112
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; PR1348
 
+; CHECK-NOT: 4294967112
+
 	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
 
 define i8* @md5_buffer(i8* %buffer, i64 %len, i8* %resblock) {
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index b27ef83..38fc5e1 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: punpckhwd
 
 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 660d4fe..5612d9e 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
+
+; CHECK: paddusw
+
 @R = external global x86_mmx          ; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 62624a7..4f7ae32 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep movl
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK-NOT: movl
 
 define zeroext i8 @t(i8 zeroext  %x, i8 zeroext  %y)   {
 	%tmp2 = add i8 %x, 2
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index d3120f3..82052b1 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+
+; CHECK: inc
+; CHECK-NOT: PTR
+; CHECK: {{$}}
 
 define signext   i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) {
 entry:
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index 56a109a..c467024 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -relocation-model=static | grep "foo str$"
+; RUN: llc < %s -relocation-model=static | FileCheck %s
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux"
 @str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
 
+; CHECK: {{foo str$}}
+
 define i32 @unknown_bootoption() {
 entry:
 	tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
index 6997d53..e8c957b 100644
--- a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -o - | grep sinl
+; RUN: llc < %s -o - | FileCheck %s
+
+; CHECK: sinl
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index a52b365..b06b249 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s | grep "a:" | not grep ax
-; RUN: llc < %s | grep "b:" | not grep ax
+; RUN: llc < %s | FileCheck %s
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
@@ -15,6 +14,10 @@ entry:
 	ret void
 }
 
+; CHECK: a:
+; CHECK-NOT: ax
+; CHECK: {{$}}
+
 define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind  {
 entry:
 	%tmp1 = getelementptr i16* %block, i32 64		; <i16*> [#uses=1]
@@ -22,3 +25,6 @@ entry:
 	ret void
 }
 
+; CHECK: b:
+; CHECK-NOT: ax
+; CHECK: {{$}}
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index f8f317c..e7caa7a 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: testb
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 9ea34e2..5bec179 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,7 +1,9 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | FileCheck %s
 ; rdar://6608609
 
+; CHECK-NOT: commuted
+
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
 entry:
 	%tmp.i2 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index f40fddc..cc1d73d 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: not grep and %t
-; RUN: not grep shr %t
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; rdar://6661955
 
+; CHECK-NOT: and
+; CHECK-NOT: shr
+
 @hello = internal constant [7 x i8] c"hello\0A\00"
 @world = internal constant [7 x i8] c"world\0A\00"
 
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 0607eda..679a65d 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,8 +1,10 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | FileCheck %s
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
+; CHECK: {{Number of modref unfolded}}
+
 	%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
 	%struct.anon = type { [16 x i64] }
 @K512 = external constant [80 x i64], align 32		; <[80 x i64]*> [#uses=2]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 08bf9e3..d104c87 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic > %t2
-; RUN: grep "leaq.*TLSGD" %t2
-; RUN: grep "__tls_get_addr" %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic | FileCheck %s
 ; PR4004
 
+; CHECK: {{leaq.*TLSGD}}
+; CHECK: {{__tls_get_addr}}
+
 @i = thread_local global i32 15
 
 define i32 @f() {
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index 738b5fb..7468acb 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -relocation-model=static > %t
-; RUN: grep "1: ._pv_cpu_ops+8" %t
-; RUN: grep "2: ._G" %t
+; RUN: llc < %s -relocation-model=static | FileCheck %s
 ; PR4152
 
+; CHECK: {{1: ._pv_cpu_ops[+]8}}
+; CHECK: {{2: ._G}}
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 	%struct.pv_cpu_ops = type { i32, [2 x i32] }
diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
index 94773d9..c990108 100644
--- a/test/CodeGen/X86/2009-05-23-available_externally.ll
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
 ; PR4253
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -9,6 +9,9 @@ entry:
 	ret i32 %call
 }
 
+; CHECK: foo
+; CHECK: {{atoi.+PLT}}
+
 define available_externally fastcc i32 @atoi(i8* %__nptr) nounwind readonly {
 entry:
 	%call = tail call i64 @strtol(i8* nocapture %__nptr, i8** null, i32 10) nounwind readonly		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 3076322..3061dc2 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | not grep movl
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | FileCheck %s
+
+; CHECK-NOT: movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
index 2dc812d..748c5a8 100644
--- a/test/CodeGen/X86/2009-08-08-CastError.ll
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | grep movabsq
+; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | FileCheck %s
+
+; CHECK: movabsq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 8ab93fc..7650a5c 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -203,7 +203,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 6519ca0..6510ff1 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -25,7 +25,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
 !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31,  metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
 !5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 4ea3bf0..ee00dba 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -27,7 +27,7 @@ entry:
 !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
index b22a391..b5679e6 100644
--- a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
+++ b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -1,4 +1,5 @@
-; RUN: llc -fast-isel -march=x86 < %s | grep %fs:
+; RUN: llc -fast-isel -march=x86 < %s | FileCheck %s
+; CHECK: %fs:
 
 define i32 @test1(i32 addrspace(257)* %arg) nounwind {
        %tmp = load i32 addrspace(257)* %arg
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index aaa562a..91711bb 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 31a6822..8719f73 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -19,7 +19,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 2355528..14fb3e4 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -73,7 +73,7 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 54d2b40..6d91109 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -96,7 +96,7 @@ while.body.i188:                                  ; preds = %for.end173.i, %if.e
 while.body85.i:                                   ; preds = %while.body85.i, %while.body.i188
   %aFreq.0518.i = phi i32 [ %add93.i, %while.body85.i ], [ 0, %while.body.i188 ]
   %inc87.i = add nsw i32 0, 1
-  %tmp91.i = load i32* undef, align 4, !tbaa !0
+  %tmp91.i = load i32* undef, align 4
   %add93.i = add nsw i32 %tmp91.i, %aFreq.0518.i
   %or.cond514.i = and i1 undef, false
   br i1 %or.cond514.i, label %while.body85.i, label %while.end.i
@@ -168,7 +168,3 @@ if.end85:                                         ; preds = %entry
 }
 
 declare void @fprintf(...) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 832a8eb..501a810 100644
--- a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -109,7 +109,7 @@ bb49:                                             ; preds = %bb49, %bb48
   %tmp51 = add i32 %tmp50, undef
   %tmp52 = add i32 %tmp50, undef
   %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
-  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp54 = load i32* %tmp53, align 4
   %tmp55 = add i32 %tmp50, 1
   %tmp56 = icmp eq i32 %tmp55, %tmp8
   br i1 %tmp56, label %bb57, label %bb49
@@ -127,7 +127,7 @@ bb61:                                             ; preds = %bb61, %bb59
   %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
   %tmp63 = add i32 %tmp62, %tmp14
   %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
-  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  store i32 0, i32* %tmp64, align 4
   %tmp65 = add i32 %tmp62, 1
   %tmp66 = icmp eq i32 %tmp65, %tmp8
   br i1 %tmp66, label %bb67, label %bb61
@@ -149,7 +149,3 @@ declare void @Pjii(i32*, i32, i32) optsize
 declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
 
 declare void @OnOverFlow() noreturn optsize ssp align 2
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 9525653..9164eb9 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -18,7 +18,7 @@ define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double
 entry:
   call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
   %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
-  %0 = load i16* %type, align 2, !tbaa !8
+  %0 = load i16* %type, align 2
   %cmp = icmp eq i16 %0, 1
   br i1 %cmp, label %return, label %for.cond.preheader
 
@@ -45,7 +45,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
 !6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
-!8 = metadata !{metadata !"short", metadata !9}
-!9 = metadata !{metadata !"omnipotent char", metadata !10}
-!10 = metadata !{metadata !"Simple C/C++ TBAA"}
 !11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index 03b6bde..f0c7781 100644
--- a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @main() #0 {
 entry:
-  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
+  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
   %bitcast.i = extractelement <8 x float> %0, i32 0
   %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
   %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
@@ -17,7 +17,7 @@ entry:
   %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
   %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
   %vecext.i.i = extractelement <4 x float> %1, i32 0
-  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
+  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16
   unreachable
 }
 
@@ -26,6 +26,3 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll b/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll
new file mode 100644
index 0000000..9203417
--- /dev/null
+++ b/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+
+; Make sure this doesn't crash
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+define void @foo() {
+  %1 = shufflevector <3 x i8> undef, <3 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %2 = shufflevector <2 x i8> %1, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %3 = shufflevector <4 x i8> undef, <4 x i8> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  store <4 x i8> %3, <4 x i8>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 227ef34..13a6444 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -27,7 +27,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 03d2e47..5fe08ed 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -119,8 +119,8 @@ entry:
 
 ; X64: test8:
 ; X64: addq
-; X64-NEXT: sbbq
-; X64-NEXT: testb
+; X64-NEXT: setb
+; X64: ret
 
 define i32 @test9(i32 %x, i32 %y) nounwind readnone {
   %cmp = icmp eq i32 %x, 10
diff --git a/test/CodeGen/X86/asm-invalid-register-class-crasher.ll b/test/CodeGen/X86/asm-invalid-register-class-crasher.ll
new file mode 100644
index 0000000..24e2284
--- /dev/null
+++ b/test/CodeGen/X86/asm-invalid-register-class-crasher.ll
@@ -0,0 +1,9 @@
+; RUN: not llc < %s -mtriple=i386-apple-darwin 2>&1 %t
+
+; Previously, this would assert in an assert build, but crash in a release build.
+; No FileCheck, just make sure we handle this gracefully.
+define i64 @t1(i64* %p, i64 %val) #0 {
+entry:
+  %0 = tail call i64 asm sideeffect "xaddq $0, $1", "=q,*m,0,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %p, i64 %val)
+  ret i64 %0
+}
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
index 2a34e02..6237b66 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom 2>&1 | \
-; RUN:     grep "calll" | not grep "("
-; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 2>&1 | \
-; RUN:     grep "calll" | grep "*funcp"
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom  | \
+; RUN:     FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 | \
+; RUN:     FileCheck --check-prefix=CORE2 %s
+; ATOM: calll *{{%[a-z]+}}
+; CORE2: calll *funcp
 ;
 ; original source code built with clang -S -emit-llvm -M32 test32.c:
 ;
@@ -18,10 +20,6 @@
 ;     }
 ;   }
 ;
-; ModuleID = 'test32.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
-
 @sum = external global i32
 @a = common global i32 0, align 4
 @i = common global i32 0, align 4
@@ -74,4 +72,3 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
index bcfbd61..a196d81 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom 2>&1 | \
-; RUN:     grep "callq" | not grep "("
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 2>&1 | \
-; RUN:     grep "callq" | grep "*funcp"
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom  | \
+; RUN:    FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 | \
+; RUN:    FileCheck --check-prefix=CORE2 %s
+; ATOM: callq *{{%[a-z]+[0-9]*}}
+; CORE2: callq *funcp
 ;
 ; Original source code built with clang -S -emit-llvm -m64 test64.c:
 ;   int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
@@ -19,9 +21,6 @@
 ;     }
 ;   }
 ;   
-; ModuleID = 'test64.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
 
 @sum = external global i32
 @a = common global i32 0, align 4
@@ -88,4 +87,3 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-fixup-lea1.ll b/test/CodeGen/X86/atom-fixup-lea1.ll
new file mode 100644
index 0000000..4651bf2
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea1.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl
+; CHECK-NEXT:leal
+; CHECK-NEXT:decl
+; CHECK-NEXT:jne
+
+; Test for the FixupLEAs pre-emit pass. An LEA should be substituted for the ADD
+; that increments the array pointer because it is within 5 instructions of the
+; corresponding load. The ADD precedes the load by following the loop back edge.
+
+; Original C code
+;int test(int n, int * array)
+;{
+;  int sum = 0;
+;  for(int i = 0; i < n; i++)
+;    sum += array[i];
+;  return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %array, i32 %i.06
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.05
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
new file mode 100644
index 0000000..1855ea1
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK:BB#5
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:movl
+
+
+; Test for fixup lea pre-emit pass. LEA instructions should be substituted for
+; ADD instructions which compute the address and index of the load because they
+; precede the load within 5 instructions. An LEA should also be substituted for
+; an ADD which computes part of the index because it precedes the index LEA
+; within 5 instructions, this substitution is referred to as backwards chaining.
+
+; Original C Code
+;struct node_t
+;{
+;  int k, m, n, p;
+;  int * array;
+;};
+
+;extern struct node_t getnode();
+
+;int test()
+;{
+;  int sum = 0;
+;  struct node_t n = getnode();
+;  if(n.array != 0 && n.p > 0 && n.k > 0 && n.n > 0 && n.m > 0) {
+;    sum = ((int*)((int)n.array + n.p) )[ n.k + n.m + n.n ];
+;  }
+;  return sum;
+;}
+
+%struct.node_t = type { i32, i32, i32, i32, i32* }
+
+define i32 @test() {
+entry:
+  %n = alloca %struct.node_t, align 4
+  call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n)
+  %array = getelementptr inbounds %struct.node_t* %n, i32 0, i32 4
+  %0 = load i32** %array, align 4
+  %cmp = icmp eq i32* %0, null
+  br i1 %cmp, label %if.end, label %land.lhs.true
+
+land.lhs.true:
+  %p = getelementptr inbounds %struct.node_t* %n, i32 0, i32 3
+  %1 = load i32* %p, align 4
+  %cmp1 = icmp sgt i32 %1, 0
+  br i1 %cmp1, label %land.lhs.true2, label %if.end
+
+land.lhs.true2:
+  %k = getelementptr inbounds %struct.node_t* %n, i32 0, i32 0
+  %2 = load i32* %k, align 4
+  %cmp3 = icmp sgt i32 %2, 0
+  br i1 %cmp3, label %land.lhs.true4, label %if.end
+
+land.lhs.true4:
+  %n5 = getelementptr inbounds %struct.node_t* %n, i32 0, i32 2
+  %3 = load i32* %n5, align 4
+  %cmp6 = icmp sgt i32 %3, 0
+  br i1 %cmp6, label %land.lhs.true7, label %if.end
+
+land.lhs.true7:
+  %m = getelementptr inbounds %struct.node_t* %n, i32 0, i32 1
+  %4 = load i32* %m, align 4
+  %cmp8 = icmp sgt i32 %4, 0
+  br i1 %cmp8, label %if.then, label %if.end
+
+if.then:
+  %add = add i32 %3, %2
+  %add12 = add i32 %add, %4
+  %5 = ptrtoint i32* %0 to i32
+  %add15 = add nsw i32 %1, %5
+  %6 = inttoptr i32 %add15 to i32*
+  %arrayidx = getelementptr inbounds i32* %6, i32 %add12
+  %7 = load i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:
+  %sum.0 = phi i32 [ %7, %if.then ], [ 0, %land.lhs.true7 ], [ 0, %land.lhs.true4 ], [ 0, %land.lhs.true2 ], [ 0, %land.lhs.true ], [ 0, %entry ]
+  ret i32 %sum.0
+}
+
+declare void @getnode(%struct.node_t* sret, ...)
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
new file mode 100644
index 0000000..311b0b3
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl ([[reg:%[a-z]+]])
+; CHECK-NEXT: addl $4, [[reg]]
+
+; Test for the FixupLEAs pre-emit pass.
+; An LEA should NOT be substituted for the ADD instruction
+; that increments the array pointer if it is greater than 5 instructions
+; away from the memory reference that uses it.
+
+; Original C code: clang -m32 -S -O2
+;int test(int n, int * array, int * m, int * array2)
+;{
+;  int i, j = 0;
+;  int sum = 0;
+;  for (i = 0, j = 0; i < n;) {
+;    ++i;
+;    *m += array2[j++];
+;    sum += array[i];
+;  }
+;  return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %.pre = load i32* %m, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
+  %sum.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+  %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
+  %inc1 = add nsw i32 %j.09, 1
+  %arrayidx = getelementptr inbounds i32* %array2, i32 %j.09
+  %1 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %1
+  store i32 %add, i32* %m, align 4
+  %arrayidx2 = getelementptr inbounds i32* %array, i32 %inc1
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, %sum.010
+  %exitcond = icmp eq i32 %inc1, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
index 0e7cf8c..05e630b 100644
--- a/test/CodeGen/X86/atomic-dagsched.ll
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -18,8 +18,8 @@ loop.cond:                                        ; preds = %test.exit, %entry
   br i1 %3, label %return, label %loop
 
 loop:                                             ; preds = %loop.cond
-  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
-  %5 = load i64* %4, align 8, !tbaa !3
+  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %5 = load i64* %4, align 8
   %vector.size.i = ashr i64 %5, 3
   %num.vector.wi.i = shl i64 %vector.size.i, 3
   %6 = icmp eq i64 %vector.size.i, 0
@@ -65,8 +65,8 @@ scalarIf.i:                                       ; preds = %vector_kernel_entry
   br i1 %18, label %test.exit, label %dim_0_pre_head.i
 
 dim_0_pre_head.i:                                 ; preds = %scalarIf.i
-  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
-  %20 = load i64* %19, align 8, !tbaa !3
+  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %20 = load i64* %19, align 8
   %21 = trunc i64 %20 to i32
   %22 = mul i64 %vector.size.i, 8
   br label %scalar_kernel_entry.i
@@ -76,10 +76,10 @@ scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry
   %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
   %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
   %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
-  %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4
+  %25 = load i32 addrspace(1)* %scevgep16, align 4
   %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
   %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
-  store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4
+  store i32 %21, i32 addrspace(1)* %scevgep15, align 4
   %asr.iv.next13 = add i64 %asr.iv12, 1
   %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
   br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
@@ -97,12 +97,6 @@ return:                                           ; preds = %loop.cond
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
-
 ; CHECK: test
 ; CHECK: decq
 ; CHECK-NOT: cmpxchgl
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 95854c7..64c4627 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -121,3 +121,13 @@ define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
   %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
   ret <16 x i16> %res
 }
+
+;;; Check that VMOVPQIto64rr generates the assembly string "vmovd".  Previously
+;;; an incorrect mnemonic of "movd" was printed for this instruction.
+; CHECK: VMOVPQIto64rr
+; CHECK: vmovd
+define i64 @VMOVPQIto64rr(<2 x i64> %a) {
+entry:
+  %vecext.i = extractelement <2 x i64> %a, i32 0
+  ret i64 %vecext.i
+}
diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll
new file mode 100644
index 0000000..d52ae52
--- /dev/null
+++ b/test/CodeGen/X86/avx-brcond.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+
+define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test1:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test6:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: vptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 5534712..271fb42 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -524,7 +524,7 @@ entry:
   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
 
 entry.if.then_crit_edge:
-  %.pre14 = load i8* undef, align 1, !tbaa !0
+  %.pre14 = load i8* undef, align 1
   br label %if.then
 
 lor.lhs.false:
@@ -537,7 +537,7 @@ exit:
 if.then:
   %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
   %1 = and i8 %0, 1
-  store i8 %1, i8* undef, align 4, !tbaa !0
+  store i8 %1, i8* undef, align 4
   br label %if.end
 
 if.end:
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 44670c8..bc4032b 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -108,3 +108,150 @@ bb2:                                              ; preds = %entry, %bb1
   ret float %.0
 }
 
+declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
+declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test5:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test10:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test11:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test12:
+; CHECK: ptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 3bb9124..d69bfa6 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
-; RUN: not grep InlineAsm %t
-; RUN: FileCheck %s < %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix CHK %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHK-NOT: InlineAsm
 
 ; CHECK: foo:
 ; CHECK: bswapq
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index 39a784de..e28923b 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -522,11 +522,8 @@ UnifiedReturnBlock:		; preds = %entry
 
 declare void @foo()
 
-; rdar://12755626
 define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
-; CHECK: invert
-; CHECK: btl %eax, %ecx
-; CHECK: setae
+; CHECK: btl
 entry:
   %neg = xor i32 %flags, -1
   %shl = shl i32 1, %flag
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 38cda4d..8753594 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep "call.*12345678"
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep "call.*12345678"
-; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | FileCheck -check-prefix X86STA %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | FileCheck -check-prefix X86PIC %s
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | FileCheck -check-prefix X86DYN %s
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llc < %s -march=x86-64 | grep "call.*\*%rax"
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix X64 %s
 
 ; PR3666
 ; PR3773
@@ -16,3 +16,8 @@ entry:
 	%0 = call i32 inttoptr (i32 12345678 to i32 (i32)*)(i32 0) nounwind		; <i32> [#uses=1]
 	ret i32 %0
 }
+
+; X86STA: {{call.*12345678}}
+; X86PIC-NOT: {{call.*12345678}}
+; X86DYN: {{call.*12345678}}
+; X64: {{call.*[*]%rax}}
diff --git a/test/CodeGen/X86/coalescer-identity.ll b/test/CodeGen/X86/coalescer-identity.ll
index 9c72ee6..1aac095 100644
--- a/test/CodeGen/X86/coalescer-identity.ll
+++ b/test/CodeGen/X86/coalescer-identity.ll
@@ -12,10 +12,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @func() nounwind uwtable ssp {
 for.body.lr.ph:
-  %0 = load i32* @g2, align 4, !tbaa !0
+  %0 = load i32* @g2, align 4
   %tobool6 = icmp eq i32 %0, 0
   %s.promoted = load i16* @s, align 2
-  %.pre = load i32* @g1, align 4, !tbaa !0
+  %.pre = load i32* @g1, align 4
   br i1 %tobool6, label %for.body.us, label %for.body
 
 for.body.us:                                      ; preds = %for.body.lr.ph, %for.inc.us
@@ -43,11 +43,11 @@ for.inc.us:                                       ; preds = %cond.end.us, %for.b
 cond.end.us:                                      ; preds = %if.then7.us, %cond.false.us
   %4 = phi i32 [ 0, %cond.false.us ], [ %1, %if.then7.us ]
   %cond.us = phi i32 [ 0, %cond.false.us ], [ %v.010.us, %if.then7.us ]
-  store i32 %cond.us, i32* @g0, align 4, !tbaa !0
+  store i32 %cond.us, i32* @g0, align 4
   br label %for.inc.us
 
 cond.false.us:                                    ; preds = %if.then7.us
-  store i32 0, i32* @g1, align 4, !tbaa !0
+  store i32 0, i32* @g1, align 4
   br label %cond.end.us
 
 if.then7.us:                                      ; preds = %for.body.us
@@ -76,7 +76,3 @@ for.end:                                          ; preds = %for.inc.us, %for.bo
   store i16 %dec12.lcssa, i16* @s, align 2
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/code_placement_align_all.ll b/test/CodeGen/X86/code_placement_align_all.ll
new file mode 100644
index 0000000..1e5e8f7
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_align_all.ll
@@ -0,0 +1,22 @@
+; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux -align-all-blocks=16 < %s | FileCheck %s
+
+;CHECK: foo
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: ret
+define i32 @foo(i32 %t, i32 %l) nounwind readnone ssp uwtable {
+  %1 = icmp eq i32 %t, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = add nsw i32 %t, 2
+  ret i32 %3
+
+; <label>:4                                       ; preds = %0
+  %5 = icmp eq i32 %l, 0
+  %. = select i1 %5, i32 0, i32 5
+  ret i32 %.
+}
+
+
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll
new file mode 100644
index 0000000..e8ee070
--- /dev/null
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Check that the CodeGenPrepare Pass
+; does not wrongly rewrite the address computed by Instruction %4
+; as [12 + Base:%this].
+
+; This test makes sure that:
+; - both the store and the first load instructions
+;   within basic block labeled 'if.then' are not removed. 
+; - the store instruction stores a value at address [60 + %this]
+; - the first load instruction loads a value at address [12 + %this]
+
+%class.A = type { %struct.B }
+%struct.B = type { %class.C, %class.D, %class.C, %class.D }
+%class.C = type { float, float, float }
+%class.D = type { [3 x %class.C] }
+
+define linkonce_odr void @foo(%class.A* nocapture %this, i32 %BoolValue) nounwind uwtable {
+entry:
+  %cmp = icmp eq i32 %BoolValue, 0
+  %address1 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 3
+  %address2 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 1
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                         ; preds = %entry
+  %0 = getelementptr inbounds %class.D* %address2, i64 0, i32 0, i64 0, i32 0
+  %1 = load float* %0, align 4 
+  %2 = getelementptr inbounds float* %0, i64 3
+  %3 = load float* %2, align 4 
+  %4 = getelementptr inbounds %class.D* %address1, i64 0, i32 0, i64 0, i32 0
+  store float %1, float* %4, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %if.else, %entry
+  ret void
+}
+
+; CHECK: foo:
+; CHECK: movss 12([[THIS:%[a-zA-Z0-9]+]]), [[REGISTER:%[a-zA-Z0-9]+]]
+; CHECK-NEXT: movss [[REGISTER]], 60([[THIS]])
+
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index d810cb1..7d5ca47 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | FileCheck %s
+
+; CHECK-NOT: movaps
 
 @a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
new file mode 100644
index 0000000..8c4fa27
--- /dev/null
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -disable-cfi -disable-fp-elim -mtriple x86_64-apple-darwin11 | FileCheck %s
+
+%ty = type { i8* }
+
+@gv = external global i32
+
+; This is aligning the stack with a push of a random register.
+; CHECK: pushq %rax
+
+; Even though we can't encode %rax into the compact unwind, We still want to be
+; able to generate a compact unwind encoding in this particular case.
+;
+; CHECK: __LD,__compact_unwind
+; CHECK: _foo ## Range Start
+; CHECK: 16842753 ## Compact Unwind Encoding: 0x1010001
+
+define i8* @foo(i64 %size) {
+  %addr = alloca i64, align 8
+  %tmp20 = load i32* @gv, align 4
+  %tmp21 = call i32 @bar()
+  %tmp25 = load i64* %addr, align 8
+  %tmp26 = inttoptr i64 %tmp25 to %ty*
+  %tmp29 = getelementptr inbounds %ty* %tmp26, i64 0, i32 0
+  %tmp34 = load i8** %tmp29, align 8
+  %tmp35 = getelementptr inbounds i8* %tmp34, i64 %size
+  store i8* %tmp35, i8** %tmp29, align 8
+  ret i8* null
+}
+
+declare i32 @bar()
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
index be8de5e..d38ce91 100644
--- a/test/CodeGen/X86/compiler_used.ll
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
-; We should have a .no_dead_strip directive for Z but not for X/Y.
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
 
 @X = internal global i8 4
 @Y = internal global i32 123
@@ -7,3 +6,7 @@
 
 @llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
 @llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
+
+; CHECK-NOT: .no_dead_strip
+; CHECK: .no_dead_strip	_Z
+; CHECK-NOT: .no_dead_strip
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 6d21962..852b642 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -238,7 +238,7 @@ declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
 
 define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
 bb:
-  %tmp = load %t9** undef, align 4, !tbaa !0
+  %tmp = load %t9** undef, align 4
   %tmp2 = getelementptr inbounds %t9* %tmp, i32 0, i32 0
   %tmp3 = getelementptr inbounds %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
   br label %bb4
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
index aca06a2..719a526 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -30,7 +30,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
 !6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
index aabc206..f72729c 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -14,7 +14,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
index a9b8f1f..5c2e62b 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -20,7 +20,7 @@ declare i32 @bar() nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
index 17d6457..cc612b2 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -19,7 +19,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
 !8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
index ff16318..c381cd7 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll
@@ -7,8 +7,8 @@ define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwi
 entry:
   tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
   tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
-  %tmp1 = load i64* %LHS, align 4, !dbg !15, !tbaa !17
-  %tmp3 = load i64* %RHS, align 4, !dbg !15, !tbaa !17
+  %tmp1 = load i64* %LHS, align 4, !dbg !15
+  %tmp3 = load i64* %RHS, align 4, !dbg !15
   %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
   ret i1 %cmp, !dbg !15
 }
@@ -47,9 +47,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !14 = metadata !{i32 2, i32 49, metadata !1, null}
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
 !16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{metadata !"long long", metadata !18}
-!18 = metadata !{metadata !"omnipotent char", metadata !19}
-!19 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !20 = metadata !{i32 6, i32 19, metadata !6, null}
 !21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 7, i32 10, metadata !23, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index baad6c0..30d0305 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -47,7 +47,7 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
 !5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
index 26bac2e..d1774cc 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -35,7 +35,7 @@ entry:
 !llvm.dbg.cu = !{!0}
 !18 = metadata !{metadata !1, metadata !6}
 
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 6090185..b08d68a 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -14,7 +14,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index fcbf64f..c63235e 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -27,7 +27,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
index 55be3b1..acc360e 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -82,7 +82,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index 2a1916f..a6c3e13 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -49,7 +49,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 6766dbe..b068bbb 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -6,7 +6,7 @@ define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
 entry:
   tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
   %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
-  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14
   tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
   %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
   %add = add nsw i32 %tmp2, 1, !dbg !19
@@ -21,7 +21,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -34,9 +34,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 5, i32 19, metadata !0, null}
 !14 = metadata !{i32 6, i32 14, metadata !12, null}
-!15 = metadata !{metadata !"int", metadata !16}
-!16 = metadata !{metadata !"omnipotent char", metadata !17}
-!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !18 = metadata !{i32 7, i32 2, metadata !12, null}
 !19 = metadata !{i32 8, i32 2, metadata !12, null}
 !20 = metadata !{metadata !0}
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index d591f94..5121ed1 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "add	ESP, 8"
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; CHECK: add ESP, 8
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
index 9233d3f..21fae4a 100644
--- a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; RUN: llc -O0 -relocation-model=pic < %s | FileCheck %s
+; CHECK-NOT: call
 ; rdar://8396318
 
 ; Don't emit a PIC base register if no addresses are needed.
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index b3adb80..bbbaeb2 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: LCPI0_0(%rip)
+
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
new file mode 100644
index 0000000..45494f1
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = sdiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_sdiv64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_srem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = srem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_srem64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_udiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = udiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_udiv64:
+; CHECK: xorl
+; CHECK: divq
+
+define i64 @test_urem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = urem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_urem64:
+; CHECK: xorl
+; CHECK: divq
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
new file mode 100644
index 0000000..7aba7f7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = sdiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_sdiv8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_srem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = srem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_srem8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_udiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = udiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_udiv8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i8 @test_urem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = urem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_urem8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i16 @test_sdiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = sdiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_sdiv16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_srem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = srem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_srem16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_udiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = udiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_udiv16:
+; CHECK: xorl
+; CHECK: divw
+
+define i16 @test_urem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = urem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_urem16:
+; CHECK: xorl
+; CHECK: divw
+
+define i32 @test_sdiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = sdiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_sdiv32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_srem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = srem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_srem32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_udiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = udiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_udiv32:
+; CHECK: xorl
+; CHECK: divl
+
+define i32 @test_urem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = urem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_urem32:
+; CHECK: xorl
+; CHECK: divl
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index f42a4a2..67fdad2 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,5 +1,9 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | FileCheck --check-prefix=SSE2 %s
+
+; SSE2: xor
+; SSE2: xor
+; SSE2-NOT: xor
 
 ; CHECK: doo:
 ; CHECK: xor
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index cb2464e..de75095 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: _kill@GOTPCREL(%rip)
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index c3e527c..79ff79d4 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | FileCheck %s
+; CHECK-NOT: add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll
new file mode 100644
index 0000000..7ce7f67
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-unaligned-store.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
+entry:
+  store i32 %value, i32* %addr, align 1
+  ret i32 %value
+}
+
+; CHECK: ret
+
+define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
+entry:
+  store i16 %value, i16* %addr, align 1
+  ret i16 %value
+}
+
+; CHECK: ret
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index 33b18bb..3569d36 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -7,3 +7,8 @@ define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
         ret void
 }
 
+define x86_fastcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+entry:
+  ret i32 %a
+}
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index d044a2a..e11cdd1 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd
+; CHECK-NOT: mov
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index f1204d6..e6828e4 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2
+; RUN: llc < %s -tailcallopt=false | FileCheck %s
+; CHECK: movl 8(%esp), %eax 
+; CHECK: movl 8(%esp), %eax 
+; CHECK-NOT: movl 8(%esp), %eax 
+
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index d457418..97814db 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | FileCheck %s
 
 	%struct.foo = type { [4 x i32] }
 
@@ -9,6 +9,8 @@ entry:
 	store i32 1, i32* %tmp3, align 8
         ret void
 }
+; CHECK: bar
+; CHECK: ret{{[^4]*$}}
 
 @dst = external global i32
 
@@ -21,3 +23,5 @@ define void @foo() nounwind {
         store i32 %tmp6, i32* @dst
         ret void
 }
+; CHECK: foo
+; CHECK: ret{{[^4]*$}}
diff --git a/test/CodeGen/X86/fastcc3struct.ll b/test/CodeGen/X86/fastcc3struct.ll
index 84f8ef6..98dc2f5 100644
--- a/test/CodeGen/X86/fastcc3struct.ll
+++ b/test/CodeGen/X86/fastcc3struct.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: grep "movl	.48, %ecx" %t
-; RUN: grep "movl	.24, %edx" %t
-; RUN: grep "movl	.12, %eax" %t
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: movl {{.}}12, %eax
+; CHECK: movl {{.}}24, %edx
+; CHECK: movl {{.}}48, %ecx
 
 %0 = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
index f1fcbcf..16e4786 100644
--- a/test/CodeGen/X86/fold-imm.ll
+++ b/test/CodeGen/X86/fold-imm.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 | grep inc
-; RUN: llc < %s -march=x86 | grep add | grep 4
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i32 %X) nounwind {
 entry:
@@ -7,8 +6,16 @@ entry:
 	ret i32 %0
 }
 
+; CHECK: test
+; CHECK: inc
+; CHECK: ret
+
 define i32 @test2(i32 %X) nounwind {
 entry:
 	%0 = add i32 %X, 4
 	ret i32 %0
 }
+
+; CHECK: test2
+; CHECK: {{add.*4.*$}}
+; CHECK: ret
diff --git a/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
new file mode 100644
index 0000000..3468a45
--- /dev/null
+++ b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+define void @bar(i32 %argc) #0 {
+; CHECK: bar:
+; CHECK: pushq %rbp
+entry:
+  %conv = sitofp i32 %argc to double
+  %mul = fmul double %conv, 3.792700e+01
+  %conv1 = fptrunc double %mul to float
+  %div = fdiv double 9.273700e+02, %conv
+  %conv3 = fptrunc double %div to float
+  tail call void @foo(float %conv1, float %conv3)
+  ret void
+}
+
+define void @qux(i32 %argc) #1 {
+; CHECK: qux:
+; CHECK-NOT: pushq %rbp
+entry:
+  %conv = sitofp i32 %argc to double
+  %mul = fmul double %conv, 3.792700e+01
+  %conv1 = fptrunc double %mul to float
+  %div = fdiv double 9.273700e+02, %conv
+  %conv3 = fptrunc double %div to float
+  tail call void @foo(float %conv1, float %conv3)
+  ret void
+}
+
+declare void @foo(float, float)
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
+attributes #1 = { "no-frame-pointer-elim"="false" }
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index 62d8100..dc59c5a 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,7 +1,8 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
-; RUN:   grep ".long.1123418112"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | FileCheck %s
+
+; CHECK: {{.long.1123418112}}
 
 define double @D() {
         ret double 1.230000e+02
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index a160ac6..72ea12f 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep fild | not grep ESP
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define double @short(i16* %P) {
         %V = load i16* %P               ; <i16> [#uses=1]
@@ -18,3 +18,9 @@ define double @long(i64* %P) {
         ret double %V2
 }
 
+; CHECK: long
+; CHECK: fild
+; CHECK-NOT: ESP
+; CHECK-NOT: esp
+; CHECK: {{$}}
+; CHECK: ret
diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
index e0165fb..13046d8 100644
--- a/test/CodeGen/X86/long-setcc.ll
+++ b/test/CodeGen/X86/long-setcc.ll
@@ -1,18 +1,31 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep shr | count 1
-; RUN: llc < %s -march=x86 | grep xor | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i1 @t1(i64 %x) nounwind {
 	%B = icmp slt i64 %x, 0
 	ret i1 %B
 }
 
+; CHECK: t1
+; CHECK: shrl
+; CHECK-NOT: shrl
+; CHECK: ret
+
 define i1 @t2(i64 %x) nounwind {
 	%tmp = icmp ult i64 %x, 4294967296
 	ret i1 %tmp
 }
 
+; CHECK: t2
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: ret
+
 define i1 @t3(i32 %x) nounwind {
 	%tmp = icmp ugt i32 %x, -1
 	ret i1 %tmp
 }
+
+; CHECK: t3
+; CHECK: xor
+; CHECK-NOT: xor
+; CHECK: ret
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
index 932141d..bbf8f01 100644
--- a/test/CodeGen/X86/lsr-normalization.ll
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep div | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; rdar://8168938
 
 ; This testcase involves SCEV normalization with the exit value from
@@ -6,6 +6,9 @@
 ; loop. The expression should be properly normalized and simplified,
 ; and require only a single division.
 
+; CHECK: div
+; CHECK-NOT: div
+
 %0 = type { %0*, %0* }
 
 @0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 6566f56..b2aea90 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -17,7 +17,7 @@
 ; ATOM-NEXT: movsd A(,%rax,8)
 ; ATOM-NEXT: mulsd
 ; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
+; ATOM-NEXT: leaq 1(%rax), %rax
 
 @A = external global [0 x double]
 
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
new file mode 100644
index 0000000..0450cfb
--- /dev/null
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; Test scheduling of copy instructions.
+;
+; Argument copies should be hoisted to the top of the block.
+; Return copies should be sunk to the end.
+; MUL_HiLo PhysReg use copies should be just above the mul.
+; MUL_HiLo PhysReg def copies should be just below the mul.
+;
+; CHECK:      *** Final schedule for BB#1 ***
+; CHECK-NEXT: %EAX<def> = COPY
+; CHECK:      MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK:      DIVSSrm
+define i64 @mulhoist(i32 %a, i32 %b) #0 {
+entry:
+  br label %body
+
+body:
+  %convb = sitofp i32 %b to float
+  ; Generates an iMUL64r to legalize types.
+  %aa = zext i32 %a to i64
+  %mul = mul i64 %aa, 74383
+  ; Do some dependent long latency stuff.
+  %trunc = trunc i64 %mul to i32
+  %convm = sitofp i32 %trunc to float
+  %divm = fdiv float %convm, 0.75
+  ;%addmb = fadd float %divm, %convb
+  ;%divmb = fdiv float %addmb, 0.125
+  ; Do some independent long latency stuff.
+  %conva = sitofp i32 %a to float
+  %diva = fdiv float %conva, 0.75
+  %addab = fadd float %diva, %convb
+  %divab = fdiv float %addab, 0.125
+  br label %end
+
+end:
+  %val = fptosi float %divab to i64
+  %add = add i64 %mul, %val
+  ret i64 %add
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 0f6e442..15e8a0a 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -12,86 +12,86 @@
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
   %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
-  %0 = load double* %arrayidx1.i, align 8, !tbaa !0
+  %0 = load double* %arrayidx1.i, align 8
   %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
-  %1 = load double* %arrayidx3.i, align 8, !tbaa !0
+  %1 = load double* %arrayidx3.i, align 8
   %mul.i = fmul double %0, %1
   %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
-  %2 = load double* %arrayidx5.i, align 8, !tbaa !0
+  %2 = load double* %arrayidx5.i, align 8
   %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
-  %3 = load double* %arrayidx7.i, align 8, !tbaa !0
+  %3 = load double* %arrayidx7.i, align 8
   %mul8.i = fmul double %2, %3
   %add.i = fadd double %mul.i, %mul8.i
   %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
-  %4 = load double* %arrayidx10.i, align 8, !tbaa !0
+  %4 = load double* %arrayidx10.i, align 8
   %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
-  %5 = load double* %arrayidx12.i, align 8, !tbaa !0
+  %5 = load double* %arrayidx12.i, align 8
   %mul13.i = fmul double %4, %5
   %add14.i = fadd double %add.i, %mul13.i
   %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
-  %6 = load double* %arrayidx16.i, align 8, !tbaa !0
+  %6 = load double* %arrayidx16.i, align 8
   %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
-  %7 = load double* %arrayidx18.i, align 8, !tbaa !0
+  %7 = load double* %arrayidx18.i, align 8
   %mul19.i = fmul double %6, %7
   %add20.i = fadd double %add14.i, %mul19.i
   %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
-  %8 = load double* %arrayidx25.i, align 8, !tbaa !0
+  %8 = load double* %arrayidx25.i, align 8
   %mul26.i = fmul double %0, %8
   %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
-  %9 = load double* %arrayidx30.i, align 8, !tbaa !0
+  %9 = load double* %arrayidx30.i, align 8
   %mul31.i = fmul double %2, %9
   %add32.i = fadd double %mul26.i, %mul31.i
   %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
-  %10 = load double* %arrayidx36.i, align 8, !tbaa !0
+  %10 = load double* %arrayidx36.i, align 8
   %mul37.i = fmul double %4, %10
   %add38.i = fadd double %add32.i, %mul37.i
   %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
-  %11 = load double* %arrayidx42.i, align 8, !tbaa !0
+  %11 = load double* %arrayidx42.i, align 8
   %mul43.i = fmul double %6, %11
   %add44.i = fadd double %add38.i, %mul43.i
   %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
-  %12 = load double* %arrayidx49.i, align 8, !tbaa !0
+  %12 = load double* %arrayidx49.i, align 8
   %mul50.i = fmul double %0, %12
   %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
-  %13 = load double* %arrayidx54.i, align 8, !tbaa !0
+  %13 = load double* %arrayidx54.i, align 8
   %mul55.i = fmul double %2, %13
   %add56.i = fadd double %mul50.i, %mul55.i
   %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
-  %14 = load double* %arrayidx60.i, align 8, !tbaa !0
+  %14 = load double* %arrayidx60.i, align 8
   %mul61.i = fmul double %4, %14
   %add62.i = fadd double %add56.i, %mul61.i
   %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
-  %15 = load double* %arrayidx66.i, align 8, !tbaa !0
+  %15 = load double* %arrayidx66.i, align 8
   %mul67.i = fmul double %6, %15
   %add68.i = fadd double %add62.i, %mul67.i
   %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
-  %16 = load double* %arrayidx73.i, align 8, !tbaa !0
+  %16 = load double* %arrayidx73.i, align 8
   %mul74.i = fmul double %0, %16
   %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
-  %17 = load double* %arrayidx78.i, align 8, !tbaa !0
+  %17 = load double* %arrayidx78.i, align 8
   %mul79.i = fmul double %2, %17
   %add80.i = fadd double %mul74.i, %mul79.i
   %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
-  %18 = load double* %arrayidx84.i, align 8, !tbaa !0
+  %18 = load double* %arrayidx84.i, align 8
   %mul85.i = fmul double %4, %18
   %add86.i = fadd double %add80.i, %mul85.i
   %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
-  %19 = load double* %arrayidx90.i, align 8, !tbaa !0
+  %19 = load double* %arrayidx90.i, align 8
   %mul91.i = fmul double %6, %19
   %add92.i = fadd double %add86.i, %mul91.i
   %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
-  %20 = load double* %arrayidx95.i, align 8, !tbaa !0
+  %20 = load double* %arrayidx95.i, align 8
   %mul98.i = fmul double %1, %20
   %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
-  %21 = load double* %arrayidx100.i, align 8, !tbaa !0
+  %21 = load double* %arrayidx100.i, align 8
   %mul103.i = fmul double %3, %21
   %add104.i = fadd double %mul98.i, %mul103.i
   %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
-  %22 = load double* %arrayidx106.i, align 8, !tbaa !0
+  %22 = load double* %arrayidx106.i, align 8
   %mul109.i = fmul double %5, %22
   %add110.i = fadd double %add104.i, %mul109.i
   %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
-  %23 = load double* %arrayidx112.i, align 8, !tbaa !0
+  %23 = load double* %arrayidx112.i, align 8
   %mul115.i = fmul double %7, %23
   %add116.i = fadd double %add110.i, %mul115.i
   %mul122.i = fmul double %8, %20
@@ -116,18 +116,18 @@ entry:
   %mul187.i = fmul double %19, %23
   %add188.i = fadd double %add182.i, %mul187.i
   %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
-  %24 = load double* %arrayidx191.i, align 8, !tbaa !0
+  %24 = load double* %arrayidx191.i, align 8
   %mul194.i = fmul double %1, %24
   %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
-  %25 = load double* %arrayidx196.i, align 8, !tbaa !0
+  %25 = load double* %arrayidx196.i, align 8
   %mul199.i = fmul double %3, %25
   %add200.i = fadd double %mul194.i, %mul199.i
   %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
-  %26 = load double* %arrayidx202.i, align 8, !tbaa !0
+  %26 = load double* %arrayidx202.i, align 8
   %mul205.i = fmul double %5, %26
   %add206.i = fadd double %add200.i, %mul205.i
   %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
-  %27 = load double* %arrayidx208.i, align 8, !tbaa !0
+  %27 = load double* %arrayidx208.i, align 8
   %mul211.i = fmul double %7, %27
   %add212.i = fadd double %add206.i, %mul211.i
   %mul218.i = fmul double %8, %24
@@ -152,18 +152,18 @@ entry:
   %mul283.i = fmul double %19, %27
   %add284.i = fadd double %add278.i, %mul283.i
   %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
-  %28 = load double* %arrayidx287.i, align 8, !tbaa !0
+  %28 = load double* %arrayidx287.i, align 8
   %mul290.i = fmul double %1, %28
   %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
-  %29 = load double* %arrayidx292.i, align 8, !tbaa !0
+  %29 = load double* %arrayidx292.i, align 8
   %mul295.i = fmul double %3, %29
   %add296.i = fadd double %mul290.i, %mul295.i
   %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
-  %30 = load double* %arrayidx298.i, align 8, !tbaa !0
+  %30 = load double* %arrayidx298.i, align 8
   %mul301.i = fmul double %5, %30
   %add302.i = fadd double %add296.i, %mul301.i
   %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
-  %31 = load double* %arrayidx304.i, align 8, !tbaa !0
+  %31 = load double* %arrayidx304.i, align 8
   %mul307.i = fmul double %7, %31
   %add308.i = fadd double %add302.i, %mul307.i
   %mul314.i = fmul double %8, %28
@@ -222,7 +222,3 @@ entry:
 }
 
 attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index f5566e5..4dc95c5 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -94,57 +94,57 @@ entry:
 for.body:                              ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
-  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp = load i32* %arrayidx8, align 4
   %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
-  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
+  %tmp1 = load i32* %arrayidx12, align 4
   %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
-  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp2 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
-  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
+  %tmp3 = load i32* %arrayidx12.1, align 4
   %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
-  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp4 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
-  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
+  %tmp5 = load i32* %arrayidx12.2, align 4
   %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
-  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp6 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
-  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp8 = load i32* %arrayidx8, align 4
   %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
-  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
-  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp9 = load i32* %arrayidx12.137, align 4
+  %tmp10 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
-  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
-  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp11 = load i32* %arrayidx12.1.1, align 4
+  %tmp12 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
-  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
-  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp13 = load i32* %arrayidx12.2.1, align 4
+  %tmp14 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
-  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
-  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp15 = load i32* %arrayidx12.3.1, align 4
+  %tmp16 = load i32* %arrayidx8, align 4
   %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
-  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
-  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp17 = load i32* %arrayidx12.239, align 4
+  %tmp18 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
-  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
-  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp19 = load i32* %arrayidx12.1.2, align 4
+  %tmp20 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
-  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
-  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp21 = load i32* %arrayidx12.2.2, align 4
+  %tmp22 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
-  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
-  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp23 = load i32* %arrayidx12.3.2, align 4
+  %tmp24 = load i32* %arrayidx8, align 4
   %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
-  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
-  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp25 = load i32* %arrayidx12.341, align 4
+  %tmp26 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
-  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
-  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp27 = load i32* %arrayidx12.1.3, align 4
+  %tmp28 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
-  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
-  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp29 = load i32* %arrayidx12.2.3, align 4
+  %tmp30 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
-  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
-  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
+  %tmp31 = load i32* %arrayidx12.3.3, align 4
+  %tmp7 = load i32* %arrayidx12.3, align 4
   %mul = mul nsw i32 %tmp1, %tmp
   %mul.1 = mul nsw i32 %tmp3, %tmp2
   %mul.2 = mul nsw i32 %tmp5, %tmp4
@@ -174,13 +174,13 @@ for.body:                              ; preds = %for.body, %entry
   %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
   %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
   %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
-  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
+  store i32 %add.3, i32* %arrayidx16, align 4
   %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
-  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
+  store i32 %add.3.1, i32* %arrayidx16.1, align 4
   %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
-  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
+  store i32 %add.3.2, i32* %arrayidx16.2, align 4
   %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
-  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
+  store i32 %add.3.3, i32* %arrayidx16.3, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 4
@@ -189,7 +189,3 @@ for.body:                              ; preds = %for.body, %entry
 for.end:                                        ; preds = %for.body
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index d9c7c67..33dd2eb 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | grep pinsr
+; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
 ; PR2562
 
+; CHECK: pinsr
+
 external global i16		; <i16*>:0 [#uses=1]
 external global <4 x i16>		; <<4 x i16>*>:1 [#uses=2]
 
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index 069737d..339de31 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 | grep 24576
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR2135
 
+; CHECK: 24576
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 @.str = constant [13 x i8] c"c45531m.adb\00\00"		
diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
index 29474c2..c8c2cd7 100644
--- a/test/CodeGen/X86/negative_zero.ll
+++ b/test/CodeGen/X86/negative_zero.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | FileCheck %s
+
+; CHECK: fchs
 
 
 define double @T() {
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
new file mode 100644
index 0000000..627f7da
--- /dev/null
+++ b/test/CodeGen/X86/no-compact-unwind.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck %s
+
+%"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
+%struct.anon = type { %class.ImageLoader*, i64, i64 }
+%class.ImageLoader = type { i32 (...)**, i8*, i8*, i32, i64, i64, i32, i32, %"struct.ImageLoader::recursive_lock"*, i16, i16, [4 x i8] }
+%"struct.ImageLoader::recursive_lock" = type { i32, i32 }
+
+@G1 = external hidden global %"struct.dyld::MappedRanges", align 8
+
+declare void @OSMemoryBarrier() optsize
+
+; This compact unwind encoding indicates that we could not generate correct
+; compact unwind encodings for this function. This then defaults to using the
+; DWARF EH frame.
+;
+; CHECK: .section __LD,__compact_unwind,regular,debug
+; CHECK: .quad _func
+; CHECK: .long 67108864                ## Compact Unwind Encoding: 0x4000000
+; CHECK: .quad 0                       ## Personality Function
+; CHECK: .quad 0                       ## LSDA
+;
+define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %p.019 = phi %"struct.dyld::MappedRanges"* [ @G1, %entry ], [ %1, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
+  %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
+  %0 = load %class.ImageLoader** %image4, align 8
+  %cmp5 = icmp eq %class.ImageLoader* %0, %image
+  br i1 %cmp5, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body3
+  tail call void @OSMemoryBarrier() optsize
+  store %class.ImageLoader* null, %class.ImageLoader** %image4, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 400
+  br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:                                        ; preds = %for.inc
+  %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
+  %1 = load %"struct.dyld::MappedRanges"** %next, align 8
+  %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
+  br i1 %cmp, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc10
+  ret void
+}
diff --git a/test/CodeGen/X86/nosse-error1.ll b/test/CodeGen/X86/nosse-error1.ll
index 16cbb73..cddff3f 100644
--- a/test/CodeGen/X86/nosse-error1.ll
+++ b/test/CodeGen/X86/nosse-error1.ll
@@ -1,7 +1,10 @@
-; RUN: llvm-as < %s > %t1
-; RUN: not llc -march=x86-64 -mattr=-sse < %t1 2> %t2
-; RUN: grep "SSE register return with SSE disabled" %t2
-; RUN: llc -march=x86-64 < %t1 | grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; NOSSE: {{SSE register return with SSE disabled}}
+
+; CHECK: xmm
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 @f = external global float		; <float*> [#uses=4]
diff --git a/test/CodeGen/X86/nosse-error2.ll b/test/CodeGen/X86/nosse-error2.ll
index 45a5eaf..fc9ba01 100644
--- a/test/CodeGen/X86/nosse-error2.ll
+++ b/test/CodeGen/X86/nosse-error2.ll
@@ -1,7 +1,10 @@
-; RUN: llvm-as < %s > %t1
-; RUN: not llc -march=x86 -mcpu=i686 -mattr=-sse < %t1 2> %t2
-; RUN: grep "SSE register return with SSE disabled" %t2
-; RUN: llc -march=x86 -mcpu=i686 -mattr=+sse < %t1 | grep xmm
+; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=+sse | FileCheck %s
+
+; NOSSE: {{SSE register return with SSE disabled}}
+
+; CHECK: xmm
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-unknown-linux-gnu"
 @f = external global float		; <float*> [#uses=4]
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index 8851c5b1a..10ab831 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep cmov %t | count 2
-; RUN: grep jne %t | count 1
+; RUN: llc < %s -march=x86-64 | grep cmov | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: jne
+; CHECK-NOT: jne
 
 ; LSR's OptimizeMax function shouldn't try to eliminate this max, because
 ; it has three operands.
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index 2745172..e4bafbb 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 | grep testl
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: testl
 
 ; It's tempting to eliminate the testl instruction here and just use the
 ; EFLAGS value from the incl, however it can't be known whether the add
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 2c855ce..8b2f61e 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: movl
 
 	%struct.dpoint = type { double, double }
 
diff --git a/test/CodeGen/X86/pr12889.ll b/test/CodeGen/X86/pr12889.ll
index 331d8f9..428e9b7 100644
--- a/test/CodeGen/X86/pr12889.ll
+++ b/test/CodeGen/X86/pr12889.ll
@@ -6,13 +6,10 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @func() nounwind uwtable {
 entry:
-  %0 = load i8* @c0, align 1, !tbaa !0
+  %0 = load i8* @c0, align 1
   %tobool = icmp ne i8 %0, 0
   %conv = zext i1 %tobool to i8
   %storemerge = shl nuw nsw i8 %conv, %conv
   store i8 %storemerge, i8* @c0, align 1
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index f0e31f7..1122d2d 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep "xorps.*sp" | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 ; PR2656
 
+; CHECK:     {{xorps.*sp}}
+; CHECK-NOT: {{xorps.*sp}}
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.4.0"
 	%struct.anon = type <{ float, float }>
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index 8aa744e..4413cee 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; Quote should be outside of private prefix.
 ; rdar://6855766x
 
+; CHECK: L__ZZ20
+
 	%struct.A = type { i32*, i32 }
 @"_ZZ20-[Example1 whatever]E4C.91" = private constant %struct.A { i32* null, i32 1 }		; <%struct.A*> [#uses=1]
 
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index 8ef9b5d..0bf601b 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -8,9 +8,9 @@ entry:
 ; CHECK: decq	(%{{rdi|rcx}})
 ; CHECK-NEXT: je
   %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
-  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %0 = load i64* %refcnt, align 8
   %dec = add i64 %0, -1
-  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  store i64 %dec, i64* %refcnt, align 8
   %tobool = icmp eq i64 %dec, 0
   br i1 %tobool, label %if.end, label %return
 
@@ -33,12 +33,12 @@ define i32 @test() nounwind uwtable ssp {
 entry:
 ; CHECK: decq
 ; CHECK-NOT: decq
-%0 = load i64* @c, align 8, !tbaa !0
+%0 = load i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
+store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %dec.i, 0
 %lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+store i32 %lor.ext.i, i32* @a, align 4
 %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
@@ -47,12 +47,12 @@ ret i32 0
 define i32 @test2() nounwind uwtable ssp {
 entry:
 ; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8, !tbaa !0
+%0 = load i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
+store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %0, 0
 %lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+store i32 %lor.ext.i, i32* @a, align 4
 %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
@@ -61,11 +61,6 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @free(i8* nocapture) nounwind
 
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
-
 %struct.obj2 = type { i64, i32, i16, i8 }
 
 declare void @other(%struct.obj2* ) nounwind;
diff --git a/test/CodeGen/X86/select-with-and-or.ll b/test/CodeGen/X86/select-with-and-or.ll
new file mode 100644
index 0000000..1ccf30b
--- /dev/null
+++ b/test/CodeGen/X86/select-with-and-or.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -O3 | \
+; RUN:	llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @test1(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test1
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test2
+; CHECK: cmpnle
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test3(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test3
+; CHECK: cmple
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test4(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test4
+; CHECK: cmple
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test5(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test5
+; CHECK: cmpnle
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test6
+; CHECK: cmple
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
+  %f = fcmp ult <4 x float> %a, %b
+  %s = sext <4 x i1> %f to <4 x i32>
+  %l = load <4 x i32>* %p
+  %r = and <4 x i32> %l, %s
+  ret <4 x i32> %r
+; CHECK: test7
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index f364d1f..333c466 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -4,6 +4,7 @@
 
 ; Combine sin / cos into a single call.
 ; rdar://13087969
+; rdar://13599493
 
 define float @test1(float %x) nounwind {
 entry:
@@ -14,7 +15,8 @@ entry:
 
 ; OSX_SINCOS: test1:
 ; OSX_SINCOS: callq ___sincosf_stret
-; OSX_SINCOS: addss %xmm1, %xmm0
+; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
+; OSX_SINCOS: addss %xmm0, %xmm1
 
 ; OSX_NOOPT: test1
 ; OSX_NOOPT: callq _cosf
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index a7c2517..73826ed 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple="i386-pc-mingw32" < %s | FileCheck %s
 ; PR5851
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-mingw32"
-
 %0 = type { void (...)* }
 
-@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
-; CHECK: _B:
-; CHECK: .long _MyFunc@0
-
 define internal x86_stdcallcc void @MyFunc() nounwind {
 entry:
+; CHECK: MyFunc@0:
+; CHECK: ret
   ret void
 }
+
+; PR14410
+define x86_stdcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+; CHECK: ret $4
+entry:
+  ret i32 %a
+}
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
index 206886b..71df8d3 100644
--- a/test/CodeGen/X86/store-fp-constant.ll
+++ b/test/CodeGen/X86/store-fp-constant.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86 | not grep rodata
-; RUN: llc < %s -march=x86 | not grep literal
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK-NOT: rodata
+; CHECK-NOT: literal
+
 ;
 ; Check that no FP constants in this testcase ends up in the 
 ; constant pool.
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index 4f31ab5..2931bab 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 | grep "leal	.*), %e.*" | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK:     {{leal	.*[)], %e.*}}
+; CHECK-NOT: {{leal	.*[)], %e.*}}
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
index 931ae75..80ab1a2 100644
--- a/test/CodeGen/X86/subreg-to-reg-3.ll
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep imull
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: imull
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
index cd67729..04d4a71 100644
--- a/test/CodeGen/X86/subtarget-feature-change.ll
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -14,12 +14,12 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %mul = fmul float %0, %1
   %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  store float %mul, float* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -43,12 +43,12 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %mul = fmul float %0, %1
   %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  store float %mul, float* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -60,7 +60,3 @@ for.end:
 
 attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
 attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/switch-crit-edge-constant.ll b/test/CodeGen/X86/switch-crit-edge-constant.ll
index 1f2ab0d..18f987e 100644
--- a/test/CodeGen/X86/switch-crit-edge-constant.ll
+++ b/test/CodeGen/X86/switch-crit-edge-constant.ll
@@ -1,6 +1,8 @@
 ; PR925
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep mov.*str1 | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK:      {{mov.*str1}}
+; CHECK-NOT:  {{mov.*str1}}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index ecc253b..60fe776 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -50,9 +50,18 @@ define {i64, i64} @test_pair_trivial() {
 ; CHECK: test_pair_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
+define {i64, i64} @test_pair_notail() {
+  %A = tail call i64 @testi()
+
+  %b = insertvalue {i64, i64} undef, i64 %A, 0
+  %c = insertvalue {i64, i64} %b, i64 %A, 1
 
+  ret { i64, i64} %c
+}
+; CHECK: test_pair_notail:
+; CHECK-NOT: jmp	_testi
 
-define {i64, i64} @test_pair_trivial_extract() {
+define {i64, i64} @test_pair_extract_trivial() {
   %A = tail call { i64, i64} @testp()
   %x = extractvalue { i64, i64} %A, 0
   %y = extractvalue { i64, i64} %A, 1
@@ -63,10 +72,24 @@ define {i64, i64} @test_pair_trivial_extract() {
   ret { i64, i64} %c
 }
 
-; CHECK: test_pair_trivial_extract:
+; CHECK: test_pair_extract_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
-define {i8*, i64} @test_pair_conv_extract() {
+define {i64, i64} @test_pair_extract_notail() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %b = insertvalue {i64, i64} undef, i64 %y, 0
+  %c = insertvalue {i64, i64} %b, i64 %x, 1
+  
+  ret { i64, i64} %c
+}
+
+; CHECK: test_pair_extract_notail:
+; CHECK-NOT: jmp	_testp
+
+define {i8*, i64} @test_pair_extract_conv() {
   %A = tail call { i64, i64} @testp()
   %x = extractvalue { i64, i64} %A, 0
   %y = extractvalue { i64, i64} %A, 1
@@ -79,10 +102,75 @@ define {i8*, i64} @test_pair_conv_extract() {
   ret { i8*, i64} %c
 }
 
-; CHECK: test_pair_conv_extract:
+; CHECK: test_pair_extract_conv:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+define {i64, i64} @test_pair_extract_multiple() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %b = insertvalue {i64, i64} undef, i64 %x, 0
+  %c = insertvalue {i64, i64} %b, i64 %y, 1
+
+  %x1 = extractvalue { i64, i64} %b, 0
+  %y1 = extractvalue { i64, i64} %c, 1
+
+  %d = insertvalue {i64, i64} undef, i64 %x1, 0
+  %e = insertvalue {i64, i64} %b, i64 %y1, 1
+  
+  ret { i64, i64} %e
+}
+
+; CHECK: test_pair_extract_multiple:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+define {i64, i64} @test_pair_extract_undef() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  
+  %b = insertvalue {i64, i64} undef, i64 %x, 0
+  
+  ret { i64, i64} %b
+}
+
+; CHECK: test_pair_extract_undef:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
+declare { i64, { i32, i32 } } @testn()
+
+define {i64, {i32, i32}} @test_nest() {
+  %A = tail call { i64, { i32, i32 } } @testn()
+  %x = extractvalue { i64, { i32, i32}} %A, 0
+  %y = extractvalue { i64, { i32, i32}} %A, 1
+  %y1 = extractvalue { i32, i32} %y, 0
+  %y2 = extractvalue { i32, i32} %y, 1
+  
+  %b = insertvalue {i64, {i32, i32}} undef, i64 %x, 0
+  %c1 = insertvalue {i32, i32} undef, i32 %y1, 0
+  %c2 = insertvalue {i32, i32} %c1, i32 %y2, 1
+  %c = insertvalue {i64, {i32, i32}} %b, {i32, i32} %c2, 1
+ 
+  ret { i64, { i32, i32}} %c
+}
+
+; CHECK: test_nest:
+; CHECK: jmp	_testn                  ## TAILCALL
+
+%struct.A = type { i32 }
+%struct.B = type { %struct.A, i32 }
+
+declare %struct.B* @testu()
+
+define %struct.A* @test_upcast() {
+entry:
+  %A = tail call %struct.B* @testu()
+  %x = getelementptr inbounds %struct.B* %A, i32 0, i32 0
+  ret %struct.A* %x
+}
 
+; CHECK: test_upcast:
+; CHECK: jmp	_testu                  ## TAILCALL
 
 ; PR13006
 define { i64, i64 } @crash(i8* %this) {
diff --git a/test/CodeGen/X86/this-return-64.ll b/test/CodeGen/X86/this-return-64.ll
new file mode 100644
index 0000000..2b26a89
--- /dev/null
+++ b/test/CodeGen/X86/this-return-64.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+%struct.A = type { i8 }
+%struct.B = type { i32 }
+%struct.C = type { %struct.B }
+%struct.D = type { %struct.B }
+%struct.E = type { %struct.B }
+
+declare %struct.A* @A_ctor(%struct.A* returned)
+declare %struct.B* @B_ctor(%struct.B* returned, i32)
+
+declare %struct.A* @A_ctor_nothisret(%struct.A*)
+declare %struct.B* @B_ctor_nothisret(%struct.B*, i32)
+
+define %struct.C* @C_ctor(%struct.C* %this, i32 %y) {
+entry:
+; CHECK: C_ctor:
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor(%struct.B* %0, i32 %y)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_nothisret(%struct.C* %this, i32 %y) {
+entry:
+; CHECK: C_ctor_nothisret:
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %0, i32 %y)
+  ret %struct.C* %this
+}
+
+define %struct.D* @D_ctor(%struct.D* %this, i32 %y) {
+entry:
+; CHECK: D_ctor:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   A_ctor
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %0 = bitcast %struct.D* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor(%struct.B* %1, i32 %y)
+; (this next line would never be generated by Clang, actually)
+  %2 = bitcast %struct.A* %call to %struct.D*
+  ret %struct.D* %2
+}
+
+define %struct.D* @D_ctor_nothisret(%struct.D* %this, i32 %y) {
+entry:
+; CHECK: D_ctor_nothisret:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   A_ctor_nothisret
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %0 = bitcast %struct.D* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_nothisret(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %1, i32 %y)
+; (this next line would never be generated by Clang, actually)
+  %2 = bitcast %struct.A* %call to %struct.D*
+  ret %struct.D* %2
+}
+
+define %struct.E* @E_ctor(%struct.E* %this, i32 %x) {
+entry:
+; CHECK: E_ctor:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   B_ctor
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
+  %call4 = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
+  ret %struct.E* %this
+}
+
+define %struct.E* @E_ctor_nothisret(%struct.E* %this, i32 %x) {
+entry:
+; CHECK: E_ctor_nothisret:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   B_ctor_nothisret
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
+  %call4 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
+  ret %struct.E* %this
+}
diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll
index a438723..9bbe980 100644
--- a/test/CodeGen/X86/unwindraise.ll
+++ b/test/CodeGen/X86/unwindraise.ll
@@ -50,12 +50,12 @@ while.body:                                       ; preds = %uw_update_context.e
   ]
 
 if.end3:                                          ; preds = %while.body
-  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8, !tbaa !0
+  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
   %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
   br i1 %tobool, label %if.end13, label %if.then4
 
 if.then4:                                         ; preds = %if.end3
-  %5 = load i64* %exception_class, align 8, !tbaa !3
+  %5 = load i64* %exception_class, align 8
   %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call6, label %do.end21.loopexit46 [
     i32 6, label %while.end
@@ -64,7 +64,7 @@ if.then4:                                         ; preds = %if.end3
 
 if.end13:                                         ; preds = %if.then4, %if.end3
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
-  %6 = load i64* %retaddr_column.i, align 8, !tbaa !3
+  %6 = load i64* %retaddr_column.i, align 8
   %conv.i = trunc i64 %6 to i32
   %cmp.i.i.i = icmp slt i32 %conv.i, 18
   br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
@@ -77,17 +77,17 @@ cond.end.i.i.i:                                   ; preds = %if.end13
   %sext.i = shl i64 %6, 32
   %idxprom.i.i.i = ashr exact i64 %sext.i, 32
   %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
-  %7 = load i8* %arrayidx.i.i.i, align 1, !tbaa !1
+  %7 = load i8* %arrayidx.i.i.i, align 1
   %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
-  %8 = load i8** %arrayidx2.i.i.i, align 8, !tbaa !0
-  %9 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %8 = load i8** %arrayidx2.i.i.i, align 8
+  %9 = load i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i = and i64 %9, 4611686018427387904
   %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
   br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
 
 land.lhs.true.i.i.i:                              ; preds = %cond.end.i.i.i
   %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
-  %10 = load i8* %arrayidx4.i.i.i, align 1, !tbaa !1
+  %10 = load i8* %arrayidx4.i.i.i, align 1
   %tobool6.i.i.i = icmp eq i8 %10, 0
   br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
 
@@ -101,7 +101,7 @@ if.end.i.i.i:                                     ; preds = %land.lhs.true.i.i.i
 
 if.then10.i.i.i:                                  ; preds = %if.end.i.i.i
   %12 = bitcast i8* %8 to i64*
-  %13 = load i64* %12, align 8, !tbaa !3
+  %13 = load i64* %12, align 8
   br label %uw_update_context.exit
 
 cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
@@ -111,16 +111,16 @@ cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
 uw_update_context.exit:                           ; preds = %if.then10.i.i.i, %if.then.i.i.i
   %retval.0.i.i.i = phi i64 [ %11, %if.then.i.i.i ], [ %13, %if.then10.i.i.i ]
   %14 = inttoptr i64 %retval.0.i.i.i to i8*
-  store i8* %14, i8** %ra.i, align 8, !tbaa !0
+  store i8* %14, i8** %ra.i, align 8
   br label %while.body
 
 while.end:                                        ; preds = %if.then4
   %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2
-  store i64 0, i64* %private_1, align 8, !tbaa !3
-  %15 = load i8** %ra.i, align 8, !tbaa !0
+  store i64 0, i64* %private_1, align 8
+  %15 = load i8** %ra.i, align 8
   %16 = ptrtoint i8* %15 to i64
   %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3
-  store i64 %16, i64* %private_2, align 8, !tbaa !3
+  store i64 %16, i64* %private_2, align 8
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
   %17 = bitcast %struct._Unwind_FrameState* %fs.i to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %17)
@@ -130,21 +130,21 @@ while.end:                                        ; preds = %if.then4
 
 while.body.i:                                     ; preds = %uw_update_context.exit44, %while.end
   %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %18 = load i8** %ra.i, align 8, !tbaa !0
+  %18 = load i8** %ra.i, align 8
   %19 = ptrtoint i8* %18 to i64
-  %20 = load i64* %private_2, align 8, !tbaa !3
+  %20 = load i64* %private_2, align 8
   %cmp.i = icmp eq i64 %19, %20
   %cmp2.i = icmp eq i32 %call.i, 0
   br i1 %cmp2.i, label %if.end.i, label %do.end21
 
 if.end.i:                                         ; preds = %while.body.i
-  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8, !tbaa !0
+  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
   %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
   br i1 %tobool.i, label %if.end12.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %or.i = select i1 %cmp.i, i32 6, i32 2
-  %22 = load i64* %exception_class, align 8, !tbaa !3
+  %22 = load i64* %exception_class, align 8
   %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call5.i, label %do.end21 [
     i32 7, label %do.body19
@@ -160,7 +160,7 @@ cond.true.i:                                      ; preds = %if.end12.i
 
 cond.end.i:                                       ; preds = %if.end12.i
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %23 = load i64* %retaddr_column.i22, align 8, !tbaa !3
+  %23 = load i64* %retaddr_column.i22, align 8
   %conv.i23 = trunc i64 %23 to i32
   %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
   br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
@@ -173,17 +173,17 @@ cond.end.i.i.i33:                                 ; preds = %cond.end.i
   %sext.i26 = shl i64 %23, 32
   %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
   %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
-  %24 = load i8* %arrayidx.i.i.i28, align 1, !tbaa !1
+  %24 = load i8* %arrayidx.i.i.i28, align 1
   %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
-  %25 = load i8** %arrayidx2.i.i.i29, align 8, !tbaa !0
-  %26 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %25 = load i8** %arrayidx2.i.i.i29, align 8
+  %26 = load i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i31 = and i64 %26, 4611686018427387904
   %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
   br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
 
 land.lhs.true.i.i.i36:                            ; preds = %cond.end.i.i.i33
   %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
-  %27 = load i8* %arrayidx4.i.i.i34, align 1, !tbaa !1
+  %27 = load i8* %arrayidx4.i.i.i34, align 1
   %tobool6.i.i.i35 = icmp eq i8 %27, 0
   br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
 
@@ -197,7 +197,7 @@ if.end.i.i.i39:                                   ; preds = %land.lhs.true.i.i.i
 
 if.then10.i.i.i40:                                ; preds = %if.end.i.i.i39
   %29 = bitcast i8* %25 to i64*
-  %30 = load i64* %29, align 8, !tbaa !3
+  %30 = load i64* %29, align 8
   br label %uw_update_context.exit44
 
 cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
@@ -207,13 +207,13 @@ cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
 uw_update_context.exit44:                         ; preds = %if.then10.i.i.i40, %if.then.i.i.i37
   %retval.0.i.i.i42 = phi i64 [ %28, %if.then.i.i.i37 ], [ %30, %if.then10.i.i.i40 ]
   %31 = inttoptr i64 %retval.0.i.i.i42 to i8*
-  store i8* %31, i8** %ra.i, align 8, !tbaa !0
+  store i8* %31, i8** %ra.i, align 8
   br label %while.body.i
 
 do.body19:                                        ; preds = %if.then3.i
   call void @llvm.lifetime.end(i64 -1, i8* %17)
   %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
-  %32 = load i8** %ra.i, align 8, !tbaa !0
+  %32 = load i8** %ra.i, align 8
   call void @llvm.eh.return.i64(i64 %call20, i8* %32)
   unreachable
 
@@ -245,8 +245,3 @@ declare fastcc void @uw_update_context_1(%struct._Unwind_Context*, %struct._Unwi
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
index b5ebaa7..68d20a0 100644
--- a/test/CodeGen/X86/v4f32-immediate.ll
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+; CHECK: movaps
 
 define <4 x float> @foo() {
   ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
index 73d80eb..eeda5e1 100644
--- a/test/CodeGen/X86/vararg_tailcall.ll
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -39,7 +39,7 @@ declare void @bar2(i8*, i64) optsize noredzone
 ; WIN64: callq
 define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
 entry:
-  %tmp1 = load i8** @sel, align 8, !tbaa !0
+  %tmp1 = load i8** @sel, align 8
   %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
   ret i8* %call
 }
@@ -52,10 +52,10 @@ declare i8* @x2(i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
   ret i8* %call
 }
@@ -68,11 +68,11 @@ declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
-  %tmp6 = load i8** @sel7, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
+  %tmp6 = load i8** @sel7, align 8
   %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
   ret i8* %call
 }
@@ -85,14 +85,10 @@ declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
   ret i8* %call
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index b6d91a3..fd5c234 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -65,3 +65,159 @@ define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
 	%D = sext <2 x i1> %C to <2 x i64>
 	ret <2 x i64> %D
 }
+
+define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: [[CONSTSEG:[A-Z0-9_]*]]:
+; CHECK:      .long	2147483648
+; CHECK-NEXT: .long	0
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	0
+; CHECK: test7:
+; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp sgt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test8:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp slt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test9:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test10:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sle <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: [[CONSTSEG:[A-Z0-9_]*]]:
+; CHECK:      .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK: test11:
+; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ugt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test12:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ult <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test13:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp uge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test14:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp ule <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index b8ec0cf..6979f6b 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 | grep "movlhps.*%xmm0, %xmm0"
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: test3
+; CHECK: movd
+; CHECK-NOT: movd
+; CHECK: {{movlhps.*%xmm0, %xmm0}}
+; CHECK-NEXT: ret
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
index f5b3e8b..5578eca 100644
--- a/test/CodeGen/X86/vec_set-B.ll
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
 
+; CHECK-NOT: movaps
+
 ; These should both generate something like this:
 ;_test3:
 ;	movl	$1234567, %eax
diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
index 3d6369e..9c1e1ac 100644
--- a/test/CodeGen/X86/vec_set-D.ll
+++ b/test/CodeGen/X86/vec_set-D.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK: movq
 
 define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
index 64f36f9..c5d6ab8 100644
--- a/test/CodeGen/X86/vec_set-I.ll
+++ b/test/CodeGen/X86/vec_set-I.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: xorp
+; CHECK: movd
+; CHECK-NOT: xorp
 
 define void @t1() nounwind  {
 	%tmp298.i.i = load <4 x float>* null, align 16
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
index 343685b..ebf5577 100644
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ b/test/CodeGen/X86/vec_shuffle-28.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep pshufb %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+
+; CHECK:     pshufb
+; CHECK-NOT: pshufb
 
 ; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
 ;        Don't XFAIL it because it's still better than the previous code.
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 41ea024..bda3fef 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,7 +1,13 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck -check-prefix CHECK2 %s
 ; 64-bit stores here do not use MMX.
 
+; CHECK: xorps
+; CHECK-NOT: xorps
+
+; CHECK2: pcmpeqd
+; CHECK2-NOT: pcmpeqd
+
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
 
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 46b0e18..82d20a2 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llc < %s -march=x86 -mcpu=i386 > %t
-; RUN: llc < %s -march=x86 -mcpu=yonah >> %t
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll
new file mode 100644
index 0000000..f748a14
--- /dev/null
+++ b/test/CodeGen/X86/viabs.ll
@@ -0,0 +1,183 @@
+; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define <4 x i32> @test1(<4 x i32> %a) nounwind {
+; SSE2: test1:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test1:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test1:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <4 x i32> @test2(<4 x i32> %a) nounwind {
+; SSE2: test2:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test2:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test2:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sge <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <8 x i16> @test3(<8 x i16> %a) nounwind {
+; SSE2: test3:
+; SSE2: movdqa
+; SSE2: psraw $15
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test3:
+; SSSE3: pabsw
+; SSSE3-NEXT: ret
+
+; AVX2: test3:
+; AVX2: vpabsw
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i16> zeroinitializer, %a
+        %b = icmp sgt <8 x i16> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
+        ret <8 x i16> %abs
+}
+
+define <16 x i8> @test4(<16 x i8> %a) nounwind {
+; SSE2: test4:
+; SSE2: pxor
+; SSE2: pcmpgtb
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test4:
+; SSSE3: pabsb
+; SSSE3-NEXT: ret
+
+; AVX2: test4:
+; AVX2: vpabsb
+; AVX2-NEXT: ret
+        %tmp1neg = sub <16 x i8> zeroinitializer, %a
+        %b = icmp slt <16 x i8> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
+        ret <16 x i8> %abs
+}
+
+define <4 x i32> @test5(<4 x i32> %a) nounwind {
+; SSE2: test5:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test5:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test5:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sle <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+        ret <4 x i32> %abs
+}
+
+define <8 x i32> @test6(<8 x i32> %a) nounwind {
+; SSSE3: test6:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test6:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
+        ret <8 x i32> %abs
+}
+
+define <8 x i32> @test7(<8 x i32> %a) nounwind {
+; SSSE3: test7:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test7:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sge <8 x i32> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
+        ret <8 x i32> %abs
+}
+
+define <16 x i16> @test8(<16 x i16> %a) nounwind {
+; SSSE3: test8:
+; SSSE3: pabsw
+; SSSE3: pabsw
+; SSSE3-NEXT: ret
+
+; AVX2: test8:
+; AVX2: vpabsw {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <16 x i16> zeroinitializer, %a
+        %b = icmp sgt <16 x i16> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg
+        ret <16 x i16> %abs
+}
+
+define <32 x i8> @test9(<32 x i8> %a) nounwind {
+; SSSE3: test9:
+; SSSE3: pabsb
+; SSSE3: pabsb
+; SSSE3-NEXT: ret
+
+; AVX2: test9:
+; AVX2: vpabsb {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <32 x i8> zeroinitializer, %a
+        %b = icmp slt <32 x i8> %a, zeroinitializer
+        %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a
+        ret <32 x i8> %abs
+}
+
+define <8 x i32> @test10(<8 x i32> %a) nounwind {
+; SSSE3: test10:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test10:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sle <8 x i32> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a
+        ret <8 x i32> %abs
+}
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 52b987e..2bfe5fb 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; We specify -mcpu explicitly to avoid instruction reordering that happens on
+; some setups (e.g., Atom) from affecting the output.
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
@@ -117,11 +119,8 @@ entry:
 ; WIN32:      movl %eax, (%e{{[sc][px]}})
 
 ; The this pointer goes to ECX.
-; FIXME: for some reason, the below checks fail on the Ubuntu Atom D2700 bot.
-; FIXME-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; FIXME-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
-
-; WIN32:      calll "?foo@C5@@QAE?AUS5@@XZ"
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
 ; WIN32:      ret
   ret void
 }
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
index 57163d3..7d36a7a 100644
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: stack_end_address
+; CHECK: {{movq.+rbp.*$}}
+; CHECK: {{movq.+rbp.*$}}
+; CHECK: ret
 
 define i64* @stack_end_address() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index ba93378..1b0ddc6 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep "callq	f" %t1
-; RUN: not grep "callq	f@PLT" %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s
+
+
+; CHECK-NOT: {{callq	f@PLT}}
+; CHECK: {{callq	f}}
+; CHECK-NOT: {{callq	f@PLT}}
 
 define void @g() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index cbf6588..75f8902 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s | grep movswl
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: movswl
 
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index 4f1dde3..168b898 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -6,7 +6,7 @@ entry:
   br i1 undef, label %return, label %if.end.i
 
 if.end.i:                                         ; preds = %entry
-  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  %tmp7.i = load i32* undef, align 4
   br i1 undef, label %return, label %if.end
 
 if.end:                                           ; preds = %if.end.i
@@ -55,7 +55,3 @@ cond.false280:                                    ; preds = %cond.true225
 return:                                           ; preds = %if.end.i, %entry
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
index ae6221a..688b88d 100644
--- a/test/CodeGen/X86/zext-inreg-0.ll
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -march=x86 | not grep and
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep and %t
-; RUN: not grep movzbq %t
-; RUN: not grep movzwq %t
-; RUN: not grep movzlq %t
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X64 %s
+
+; X86-NOT: and
+
+; X64-NOT: and
+; X64-NOT: movzbq
+; X64-NOT: movzwq
+; X64-NOT: movzlq
 
 ; These should use movzbl instead of 'and 255'.
 ; This related to not having a ZERO_EXTEND_REG opcode.