Vendor import of llvm trunk r132879:

http://llvm.org/svn/llvm-project/llvm/trunk@132879
author: dim <dim@FreeBSD.org> 2011-06-12 15:42:51 +0000
committer: dim <dim@FreeBSD.org> 2011-06-12 15:42:51 +0000
commit: ece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
tree: b3032e51d630e8070e9e08d6641648f195316a80 /test
parent: 2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
download: FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip
FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz
294 files changed, 7158 insertions, 603 deletions
diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
index 2b0cd78..7b5584e 100644
--- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {1 may alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {1 partial alias}
 ; PR7959
 
 target datalayout = "e-p:32:32:32"
diff --git a/test/Analysis/BasicAA/dag.ll b/test/Analysis/BasicAA/dag.ll
new file mode 100644
index 0000000..501f4c3
--- /dev/null
+++ b/test/Analysis/BasicAA/dag.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; BasicAA's guard against use-def cycles shouldn't prevent it from
+; analyzing use-def dags.
+
+; CHECK: MustAlias:  i8* %base, i8* %phi
+; CHECK: MustAlias: i8* %phi, i8* %wwa
+; CHECK: MustAlias: i8* %phi, i8* %wwb
+; CHECK: MustAlias: i16* %bigbase, i8* %phi
+define i8 @foo(i8* %base, i1 %x, i1 %w) {
+entry:
+  br i1 %w, label %wa, label %wb
+wa:
+  %wwa = bitcast i8* %base to i8*
+  br label %wc
+wb:
+  %wwb = bitcast i8* %base to i8*
+  br label %wc
+wc:
+  %first = phi i8* [ %wwa, %wa ], [ %wwb, %wb ]
+  %fc = bitcast i8* %first to i8*
+  br i1 %x, label %xa, label %xb
+xa:
+  %xxa = bitcast i8* %fc to i8*
+  br label %xc
+xb:
+  %xxb = bitcast i8* %fc to i8*
+  br label %xc
+xc:
+  %phi = phi i8* [ %xxa, %xa ], [ %xxb, %xb ]
+
+  store i8 0, i8* %phi
+
+  %bigbase = bitcast i8* %base to i16*
+  store i16 -1, i16* %bigbase
+
+  %loaded = load i8* %phi
+  ret i8 %loaded
+}
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index ec0c8a7..7a71e3e 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -102,7 +102,7 @@ define i32 @test4(i8* %P) {
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
 ; CHECK: @test4
-; CHECK: load i32* @G
+; CHECK-NOT: load
 ; CHECK: memset.p0i8.i32
 ; CHECK-NOT: load
 ; CHECK: ret i32 0
@@ -117,7 +117,7 @@ define i32 @test5(i8* %P, i32 %Len) {
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
 ; CHECK: @test5
-; CHECK: load i32* @G
+; CHECK-NOT: load
 ; CHECK: memcpy.p0i8.p0i8.i32
 ; CHECK-NOT: load
 ; CHECK: ret i32 0
diff --git a/test/Analysis/BasicAA/must-and-partial.ll b/test/Analysis/BasicAA/must-and-partial.ll
new file mode 100644
index 0000000..93b6184
--- /dev/null
+++ b/test/Analysis/BasicAA/must-and-partial.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+
+; When merging MustAlias and PartialAlias, merge to PartialAlias
+; instead of MayAlias.
+
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: PartialAlias:  i16* %bigbase0, i8* %phi
+define i8 @test0(i8* %base, i1 %x) {
+entry:
+  %baseplusone = getelementptr i8* %base, i64 1
+  br i1 %x, label %red, label %green
+red:
+  br label %green
+green:
+  %phi = phi i8* [ %baseplusone, %red ], [ %base, %entry ]
+  store i8 0, i8* %phi
+
+  %bigbase0 = bitcast i8* %base to i16*
+  store i16 -1, i16* %bigbase0
+
+  %loaded = load i8* %phi
+  ret i8 %loaded
+}
+
+; CHECK: PartialAlias:  i16* %bigbase1, i8* %sel
+define i8 @test1(i8* %base, i1 %x) {
+entry:
+  %baseplusone = getelementptr i8* %base, i64 1
+  %sel = select i1 %x, i8* %baseplusone, i8* %base
+  store i8 0, i8* %sel
+
+  %bigbase1 = bitcast i8* %base to i16*
+  store i16 -1, i16* %bigbase1
+
+  %loaded = load i8* %sel
+  ret i8 %loaded
+}
diff --git a/test/Analysis/BasicAA/underlying-value.ll b/test/Analysis/BasicAA/underlying-value.ll
new file mode 100644
index 0000000..0671c82
--- /dev/null
+++ b/test/Analysis/BasicAA/underlying-value.ll
@@ -0,0 +1,25 @@
+; RUN: opt -basicaa -licm -S < %s
+; PR9931
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @func_20() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond2, %entry
+  br i1 undef, label %for.cond2, label %for.end22
+
+for.cond2:                                        ; preds = %for.body5, %for.cond
+  br i1 false, label %for.body5, label %for.cond
+
+for.body5:                                        ; preds = %for.cond2
+  %arrayidx = getelementptr inbounds [2 x i64]* undef, i32 0, i64 0
+  %tmp7 = load i64* %arrayidx, align 8
+  %arrayidx9 = getelementptr inbounds [2 x i64]* undef, i32 0, i64 undef
+  %tmp10 = load i64* %arrayidx9, align 8
+  br label %for.cond2
+
+for.end22:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
new file mode 100644
index 0000000..52e394b
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -0,0 +1,131 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+; PR9971
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.X = type { i32, float }
+%union.vector_t = type { [2 x i64] }
+
+; Don't delete the load after the loop, because it loads values stored
+; inside the loop.
+
+; CHECK: define void @vrlh(
+
+; CHECK: for.end:
+; CHECK:   %arrayidx31 = getelementptr inbounds %union.vector_t* %t, i64 0, i32 0, i64 1
+; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa !3
+
+define void @vrlh(%union.vector_t* %va, %union.vector_t* %vb, %union.vector_t* %vd) nounwind {
+entry:
+  %t = alloca %union.vector_t, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sub = sub nsw i32 7, %i.01
+  %idxprom = sext i32 %sub to i64
+  %half = bitcast %union.vector_t* %vb to [8 x i16]*
+  %arrayidx = getelementptr inbounds [8 x i16]* %half, i64 0, i64 %idxprom
+  %tmp4 = load i16* %arrayidx, align 2, !tbaa !0
+  %conv = zext i16 %tmp4 to i32
+  %and = and i32 %conv, 15
+  %sub6 = sub nsw i32 7, %i.01
+  %idxprom7 = sext i32 %sub6 to i64
+  %half9 = bitcast %union.vector_t* %va to [8 x i16]*
+  %arrayidx10 = getelementptr inbounds [8 x i16]* %half9, i64 0, i64 %idxprom7
+  %tmp11 = load i16* %arrayidx10, align 2, !tbaa !0
+  %conv12 = zext i16 %tmp11 to i32
+  %shl = shl i32 %conv12, %and
+  %sub15 = sub nsw i32 7, %i.01
+  %idxprom16 = sext i32 %sub15 to i64
+  %half18 = bitcast %union.vector_t* %va to [8 x i16]*
+  %arrayidx19 = getelementptr inbounds [8 x i16]* %half18, i64 0, i64 %idxprom16
+  %tmp20 = load i16* %arrayidx19, align 2, !tbaa !0
+  %conv21 = zext i16 %tmp20 to i32
+  %sub23 = sub nsw i32 16, %and
+  %shr = lshr i32 %conv21, %sub23
+  %or = or i32 %shl, %shr
+  %conv24 = trunc i32 %or to i16
+  %sub26 = sub nsw i32 7, %i.01
+  %idxprom27 = sext i32 %sub26 to i64
+  %half28 = bitcast %union.vector_t* %t to [8 x i16]*
+  %arrayidx29 = getelementptr inbounds [8 x i16]* %half28, i64 0, i64 %idxprom27
+  store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !0
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %arrayidx31 = getelementptr inbounds %union.vector_t* %t, i64 0, i32 0, i64 1
+  %tmp32 = load i64* %arrayidx31, align 8, !tbaa !3
+  %arrayidx35 = getelementptr inbounds %union.vector_t* %vd, i64 0, i32 0, i64 1
+  store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !3
+  %arrayidx37 = getelementptr inbounds %union.vector_t* %t, i64 0, i32 0, i64 0
+  %tmp38 = load i64* %arrayidx37, align 8, !tbaa !3
+  %arrayidx41 = getelementptr inbounds %union.vector_t* %vd, i64 0, i32 0, i64 0
+  store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !3
+  ret void
+}
+
+; Do delete the load after the loop.
+
+; CHECK: define i32 @test0(
+
+; CHECK:   ret i32 0
+
+define i32 @test0(%struct.X* %a) nounwind {
+entry:
+  %i = getelementptr inbounds %struct.X* %a, i64 0, i32 0
+  store i32 0, i32* %i, align 4, !tbaa !4
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i2.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %f = getelementptr inbounds %struct.X* %a, i64 %i2.01, i32 1
+  %tmp6 = load float* %f, align 4, !tbaa !5
+  %mul = fmul float %tmp6, 0x40019999A0000000
+  store float %mul, float* %f, align 4, !tbaa !5
+  %inc = add nsw i64 %i2.01, 1
+  %cmp = icmp slt i64 %inc, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %i9 = getelementptr inbounds %struct.X* %a, i64 0, i32 0
+  %tmp10 = load i32* %i9, align 4, !tbaa !4
+  ret i32 %tmp10
+}
+
+; Do delete the load after the loop.
+
+; CHECK: define float @test1(
+
+; CHECK:   ret float 0x3FD3333340000000
+
+define float @test1(%struct.X* %a) nounwind {
+entry:
+  %f = getelementptr inbounds %struct.X* %a, i64 0, i32 1
+  store float 0x3FD3333340000000, float* %f, align 4, !tbaa !5
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %i5 = getelementptr inbounds %struct.X* %a, i64 %i.01, i32 0
+  %tmp6 = load i32* %i5, align 4, !tbaa !4
+  %mul = mul nsw i32 %tmp6, 3
+  store i32 %mul, i32* %i5, align 4, !tbaa !4
+  %inc = add nsw i64 %i.01, 1
+  %cmp = icmp slt i64 %inc, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %f9 = getelementptr inbounds %struct.X* %a, i64 0, i32 1
+  %tmp10 = load float* %f9, align 4, !tbaa !5
+  ret float %tmp10
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"long long", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Archive/check_binary_output.ll b/test/Archive/check_binary_output.ll
new file mode 100644
index 0000000..60ab5ca
--- /dev/null
+++ b/test/Archive/check_binary_output.ll
@@ -0,0 +1,4 @@
+; This is not an assembly file, this is just to run the test.
+; The test verifies that llvm-ar produces a binary output.
+
+;RUN: llvm-ar p %p/GNU.a very_long_bytecode_file_name.bc | cmp -s %p/very_long_bytecode_file_name.bc -
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index e4e2d3a..417493f 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -10,6 +10,7 @@
 ; RUN:   not grep {llvm\\.x86\\.sse2\\.loadu}
 ; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
 declare i32 @llvm.ctpop.i28(i28 %val)
 declare i32 @llvm.cttz.i29(i29 %val)
@@ -91,3 +92,20 @@ define void @test_loadu(i8* %a, double* %b) {
   %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
   ret void
 }
+
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone 
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone 
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone 
+declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone 
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
+; CHECK: store{{.*}}nontemporal
+  call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
+; CHECK: store{{.*}}nontemporal
+  call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+  call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+  call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
+  ret void
+}
diff --git a/test/Assembler/invalid_cast.ll b/test/Assembler/invalid_cast.ll
new file mode 100644
index 0000000..c5b082b
--- /dev/null
+++ b/test/Assembler/invalid_cast.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+
+define <3 x i8> @foo(<4 x i64> %x) {
+  %y = trunc <4 x i64> %x to <3 x i8>
+  ret <3 x i8> %y
+}
diff --git a/test/Assembler/invalid_cast2.ll b/test/Assembler/invalid_cast2.ll
new file mode 100644
index 0000000..f2e7c41
--- /dev/null
+++ b/test/Assembler/invalid_cast2.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+
+define i8 @foo(<4 x i64> %x) {
+  %y = trunc <4 x i64> %x to i8
+  ret i8 %y
+}
diff --git a/test/Bitcode/2006-12-11-Cast-ConstExpr.ll b/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
index 6df8711..e704627 100644
--- a/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
+++ b/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
@@ -1,7 +1,7 @@
 ; This test ensures that we get a bitcast constant expression in and out,
 ; not a sitofp constant expression. 
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   grep {bitcast (}
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; CHECK: bitcast (
 
 @G = external global i32
 
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll
index 8a87673..a5af2b8 100644
--- a/test/Bitcode/AutoUpgradeGlobals.ll
+++ b/test/Bitcode/AutoUpgradeGlobals.ll
@@ -1,3 +1,4 @@
 ; This isn't really an assembly file. It just runs test on bitcode to ensure
 ; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh}
+; RUN: llvm-dis < %s.bc | FileCheck %s 
+; CHECK-NOT: {i32 @\\.llvm\\.eh}
diff --git a/test/Bitcode/AutoUpgradeIntrinsics.ll b/test/Bitcode/AutoUpgradeIntrinsics.ll
index 5f9bcd5..c3e2e9e 100644
--- a/test/Bitcode/AutoUpgradeIntrinsics.ll
+++ b/test/Bitcode/AutoUpgradeIntrinsics.ll
@@ -1,10 +1,8 @@
 ; This isn't really an assembly file. It just runs test on bitcode to ensure
 ; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.ct}
-; RUN: llvm-dis < %s.bc | \
-; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-dis < %s.bc | \
-; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-dis < %s.bc | \
-; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.ct}
+; CHECK-NOT: {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
+; CHECK-NOT: {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
+; CHECK-NOT: {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
 
diff --git a/test/Bitcode/blockaddress.ll b/test/Bitcode/blockaddress.ll
new file mode 100644
index 0000000..b9f3341
--- /dev/null
+++ b/test/Bitcode/blockaddress.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; PR9857
+
+define void @f(i8** nocapture %ptr1) {
+; CHECK: define void @f
+entry:
+  br label %here.i
+
+here.i:
+  store i8* blockaddress(@doit, %here), i8** %ptr1, align 8
+; CHECK: blockaddress(@doit, %here)
+  br label %doit.exit
+
+doit.exit:
+  ret void
+}
+
+define void @doit(i8** nocapture %pptr) {
+; CHECK: define void @doit
+entry:
+  br label %here
+
+here:
+  store i8* blockaddress(@doit, %here), i8** %pptr, align 8
+; CHECK: blockaddress(@doit, %here)
+  br label %end
+
+end:
+  ret void
+}
diff --git a/test/Bitcode/sse2_loadl_pd.ll b/test/Bitcode/sse2_loadl_pd.ll
index b0bea16..6cb0da5 100644
--- a/test/Bitcode/sse2_loadl_pd.ll
+++ b/test/Bitcode/sse2_loadl_pd.ll
@@ -1,2 +1,3 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.loadl.pd}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.loadl.pd} 
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movl_dq.ll b/test/Bitcode/sse2_movl_dq.ll
index 093d821..2fc0149 100644
--- a/test/Bitcode/sse2_movl_dq.ll
+++ b/test/Bitcode/sse2_movl_dq.ll
@@ -1,2 +1,3 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movl.dq}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s 
+; CHECK-NOT: {i32 @llvm\\.movl.dq}
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll
index 25a35b6..ab82c43 100644
--- a/test/Bitcode/sse2_movs_d.ll
+++ b/test/Bitcode/sse2_movs_d.ll
@@ -1,2 +1,3 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movs.d}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.movs.d}
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_punpck_qdq.ll b/test/Bitcode/sse2_punpck_qdq.ll
index b9d711c..4c68af5 100644
--- a/test/Bitcode/sse2_punpck_qdq.ll
+++ b/test/Bitcode/sse2_punpck_qdq.ll
@@ -1,3 +1,4 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.punpckh.qdq}
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.punpckl.qdq}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.punpckh.qdq}
+; CHECK-NOT: {i32 @llvm\\.punpckl.qdq}
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll
index 5829edb..1ba6a1d 100644
--- a/test/Bitcode/sse2_shuf_pd.ll
+++ b/test/Bitcode/sse2_shuf_pd.ll
@@ -1,2 +1,3 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.shuf.pd}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.shuf.pd}
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll
index f4e5d54..99b61b6 100644
--- a/test/Bitcode/sse2_unpck_pd.ll
+++ b/test/Bitcode/sse2_unpck_pd.ll
@@ -1,3 +1,4 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckh.pd}
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckl.pd}
-; RUN: llvm-dis < %s.bc | grep shufflevector
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.unpckh.pd}
+; CHECK-NOT: {i32 @llvm\\.unpckl.pd}
+; CHECK: shufflevector
diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll
index 6872cc0..752786d 100644
--- a/test/Bitcode/sse41_pmulld.ll
+++ b/test/Bitcode/sse41_pmulld.ll
@@ -1,2 +1,3 @@
-; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
-; RUN: llvm-dis < %s.bc | grep mul
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; CHECK-NOT: {i32 @llvm\\.pmulld}
+; CHECK: mul
diff --git a/test/Bitcode/sse42_crc32.ll b/test/Bitcode/sse42_crc32.ll
new file mode 100644
index 0000000..1c371c3
--- /dev/null
+++ b/test/Bitcode/sse42_crc32.ll
@@ -0,0 +1,28 @@
+; Check to make sure old CRC32 intrinsics are auto-upgraded
+; correctly.
+;
+; Rdar: 9472944
+;
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; crc32.8 should upgrade to crc32.32.8
+; CHECK: i32 @llvm.x86.sse42.crc32.32.8(
+; CHECK-NOT: i32 @llvm.x86.sse42.crc32.8(
+
+; crc32.16 should upgrade to crc32.32.16
+; CHECK: i32 @llvm.x86.sse42.crc32.32.16(
+; CHECK-NOT: i32 @llvm.x86.sse42.crc32.16(
+
+; crc32.32 should upgrade to crc32.32.32
+; CHECK: i32 @llvm.x86.sse42.crc32.32.32(
+; CHECK-NOT: i32 @llvm.x86.sse42.crc32.32(
+
+; crc64.8 should upgrade to crc32.64.8
+; CHECK: i64 @llvm.x86.sse42.crc32.64.8(
+; CHECK-NOT: i64 @llvm.x86.sse42.crc64.8(
+
+; crc64.64 should upgrade to crc32.64.64
+; CHECK: i64 @llvm.x86.sse42.crc32.64.64(
+; CHECK-NOT: i64 @llvm.x86.sse42.crc64.64(
+
+
diff --git a/test/Bitcode/sse42_crc32.ll.bc b/test/Bitcode/sse42_crc32.ll.bc
new file mode 100644
index 0000000..d895fad
--- /dev/null
+++ b/test/Bitcode/sse42_crc32.ll.bc
diff --git a/test/Bitcode/ssse3_palignr.ll b/test/Bitcode/ssse3_palignr.ll
index d596dd5..f62ca11 100644
--- a/test/Bitcode/ssse3_palignr.ll
+++ b/test/Bitcode/ssse3_palignr.ll
@@ -1 +1,2 @@
-; RUN: llvm-dis < %s.bc | not grep {@llvm\\.palign}
+; RUN: llvm-dis < %s.bc | FileCheck %s 
+; CHECK-NOT: {@llvm\\.palign}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 3909c6a..0a157c9 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -1,16 +1,16 @@
-; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; RUN: llc -O1 -march=arm -mattr=+vfp2 -mtriple=arm-linux-gnueabi < %s | FileCheck %s
 ; pr4939
 
 define void @test(double* %x, double* %y) nounwind {
-  %1 = load double* %x, align 4
-  %2 = load double* %y, align 4
+  %1 = load double* %x
+  %2 = load double* %y
   %3 = fsub double -0.000000e+00, %1
   %4 = fcmp ugt double %2, %3
   br i1 %4, label %bb1, label %bb2
 
 bb1:
 ;CHECK: vstrhi.64
-  store double %1, double* %y, align 4
+  store double %1, double* %y
   br label %bb2
 
 bb2:
diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll
index a61908f..19f756f 100644
--- a/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -13,6 +13,7 @@ entry:
 ; Make sure the scheduler schedules all uses of the preincrement
 ; induction variable before defining the postincrement value.
 ; CHECK: t:
+; CHECK: %bb
 ; CHECK-NOT: mov
 bb:                                               ; preds = %entry, %bb
   %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index a9dd971..568718c 100644
--- a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -14,15 +14,15 @@ for.cond:
   br i1 %cmp, label %for.body, label %return
 
 for.body:
-; CHECK: %for.body
-; CHECK: movs r{{[0-9]+}}, #1
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32* %A, i32 %0
   %tmp4 = load i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
-; CHECK: %for.cond
-; CHECK: movs r{{[0-9]+}}, #0
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
 
 for.inc:
   %inc = add i32 %0, 1
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
new file mode 100644
index 0000000..0b5f962
--- /dev/null
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -verify-machineinstrs
+; <rdar://problem/9187612>
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin"
+
+define void @func() unnamed_addr align 2 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %tmp2 = phi i32 [ 0, %entry ], [ %add, %for.cond.backedge ]
+  %cmp = icmp ult i32 %tmp2, 14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %add = add i32 %tmp2, 1
+  switch i32 %tmp2, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 4, label %sw.bb
+    i32 5, label %sw.bb
+    i32 10, label %sw.bb
+  ]
+
+sw.bb:
+  invoke void @foo()
+          to label %invoke.cont17 unwind label %lpad
+
+invoke.cont17:
+  invoke void @foo()
+          to label %for.cond.backedge unwind label %lpad26
+
+for.cond.backedge:
+  br label %for.cond
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+lpad26:
+  %exn27 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+sw.default:
+  br label %for.cond.backedge
+
+for.end:
+  invoke void @foo()
+          to label %call8.i.i.i.noexc unwind label %lpad44
+
+call8.i.i.i.noexc:
+  ret void
+
+lpad44:
+  %exn45 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
+  unreachable
+
+terminate.lpad:
+  %exn51 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @foo()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
+
+declare void @_ZSt9terminatev()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"bool", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
new file mode 100644
index 0000000..4db3acf
--- /dev/null
+++ b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 -arm-tail-calls=1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__collate_st_chain_pri = type { [10 x i32], [2 x i32] }
+%struct.__collate_st_char_pri = type { [2 x i32] }
+%struct.__collate_st_info = type { [2 x i8], i8, i8, [2 x i32], [2 x i32], i32, i32 }
+%struct.__collate_st_large_char_pri = type { i32, %struct.__collate_st_char_pri }
+%struct.__collate_st_subst = type { i32, [10 x i32] }
+%struct.__xlocale_st_collate = type { i32, void (i8*)*, [32 x i8], %struct.__collate_st_info, [2 x %struct.__collate_st_subst*], %struct.__collate_st_chain_pri*, %struct.__collate_st_large_char_pri*, [256 x %struct.__collate_st_char_pri] }
+%struct.__xlocale_st_messages = type { i32, void (i8*)*, i8*, %struct.lc_messages_T }
+%struct.__xlocale_st_monetary = type { i32, void (i8*)*, i8*, %struct.lc_monetary_T }
+%struct.__xlocale_st_numeric = type { i32, void (i8*)*, i8*, %struct.lc_numeric_T }
+%struct.__xlocale_st_runelocale = type { i32, void (i8*)*, [32 x i8], i32, i32, i32 (i32*, i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (%union.__mbstate_t*, %struct._xlocale*)*, i32 (i32*, i8**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32, %struct._RuneLocale }
+%struct.__xlocale_st_time = type { i32, void (i8*)*, i8*, %struct.lc_time_T }
+%struct._xlocale = type { i32, void (i8*)*, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, i32, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.__xlocale_st_collate*, %struct.__xlocale_st_runelocale*, %struct.__xlocale_st_messages*, %struct.__xlocale_st_monetary*, %struct.__xlocale_st_numeric*, %struct._xlocale*, %struct.__xlocale_st_time*, %struct.lconv }
+%struct.lc_messages_T = type { i8*, i8*, i8*, i8* }
+%struct.lc_monetary_T = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+%struct.lc_numeric_T = type { i8*, i8*, i8* }
+%struct.lc_time_T = type { [12 x i8*], [12 x i8*], [7 x i8*], [7 x i8*], i8*, i8*, i8*, i8*, i8*, i8*, [12 x i8*], i8*, i8* }
+%struct.lconv = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+%union.__mbstate_t = type { i64, [120 x i8] }
+
+@"\01_fnmatch.initial" = external constant %union.__mbstate_t, align 4
+
+; CHECK: _fnmatch
+; CHECK: blx _fnmatch1
+
+define i32 @"\01_fnmatch"(i8* %pattern, i8* %string, i32 %flags) nounwind optsize {
+entry:
+  %call4 = tail call i32 @fnmatch1(i8* %pattern, i8* %string, i8* %string, i32 %flags, %union.__mbstate_t* byval @"\01_fnmatch.initial", %union.__mbstate_t* byval @"\01_fnmatch.initial", %struct._xlocale* undef, i32 64) optsize
+  ret i32 %call4
+}
+
+declare i32 @fnmatch1(i8*, i8*, i8*, i32, %union.__mbstate_t* byval, %union.__mbstate_t* byval, %struct._xlocale*, i32) nounwind optsize
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
new file mode 100644
index 0000000..0a7bb6c
--- /dev/null
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define i32 @foo(float %scale, float %scale2) nounwind {
+entry:
+  %scale.addr = alloca float, align 4
+  %scale2.addr = alloca float, align 4
+  store float %scale, float* %scale.addr, align 4
+  store float %scale2, float* %scale2.addr, align 4
+  %tmp = load float* %scale.addr, align 4
+  %tmp1 = load float* %scale2.addr, align 4
+  call void asm sideeffect "vmul.f32    q0, q0, ${0:y} \0A\09vmul.f32    q1, q1, ${0:y} \0A\09vmul.f32    q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
+  ret i32 0
+}
+
+define void @f0() nounwind {
+entry:
+; CHECK: f0
+; CHECK: .word -1
+call void asm sideeffect ".word ${0:B} \0A\09", "i"(i32 0) nounwind
+ret void
+}
+
+define void @f1() nounwind {
+entry:
+; CHECK: f1
+; CHECK: .word 65535
+call void asm sideeffect ".word ${0:L} \0A\09", "i"(i32 -1) nounwind
+ret void
+}
+
+@f2_ptr = internal global i32* @f2_var, align 4
+@f2_var = external global i32
+
+define void @f2() nounwind {
+entry:
+; CHECK: f2
+; CHECK: ldr r0, [r{{[0-9]+}}]
+call void asm sideeffect "ldr r0, [${0:m}]\0A\09", "*m,~{r0}"(i32** @f2_ptr) nounwind
+ret void
+}
+
+@f3_ptr = internal global i64* @f3_var, align 4
+@f3_var = external global i64
+@f3_var2 = external global i64
+
+define void @f3() nounwind {
+entry:
+; CHECK: f3
+; CHECK: stm r{{[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
+; CHECK: adds lr, [[REG1]]
+; CHECK: ldm r{{[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
+%tmp = load i64* @f3_var, align 4
+%tmp1 = load i64* @f3_var2, align 4
+%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
+store i64 %0, i64* @f3_var, align 4
+%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
+store i64 %1, i64* @f3_var, align 4
+ret void
+}
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
new file mode 100644
index 0000000..03940e3
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+define void @func(i32 %argc, i8** %argv) nounwind {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
+	store i32 %0, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
+	store i32 %2, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
+	store i32 %3, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: and
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
+	store i32 %4, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: or
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
+	store i32 %5, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: eor
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
+	store i32 %6, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
+	store i32 %8, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
+	store i32 %10, i32* %old
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.min.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.max.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umax.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umin.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind 
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 946db19..c94b096 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -31,8 +31,7 @@ define i32 @f3(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f3
 ; CHECK: lsr{{.*}} #7
-; CHECK: mov r0, r1
-; CHECK: bfi r0, r2, #7, #16
+; CHECK: bfi {{.*}}, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
   %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
@@ -42,8 +41,8 @@ entry:
 ; rdar://8752056
 define i32 @f4(i32 %a) nounwind {
 ; CHECK: f4
-; CHECK: movw r1, #3137
-; CHECK: bfi r1, r0, #15, #5
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{r[0-9]+}}, #15, #5
   %1 = shl i32 %a, 15
   %ins7 = and i32 %1, 1015808
   %ins12 = or i32 %ins7, 3137
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 4dc37aa..c460f7a 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
@@ -16,6 +16,10 @@ define void @t1() {
 define void @t2() {
 ; CHECKV6: t2:
 ; CHECKV6: bx r0 @ TAILCALL
+; CHECKT2D: t2:
+; CHECKT2D: ldr
+; CHECKT2D-NEXT: ldr
+; CHECKT2D-NEXT: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
@@ -26,6 +30,9 @@ define void @t3() {
 ; CHECKV6: b _t2  @ TAILCALL
 ; CHECKELF: t3:
 ; CHECKELF: b t2(PLT) @ TAILCALL
+; CHECKT2D: t3:
+; CHECKT2D: b.w _t2  @ TAILCALL
+
         tail call void @t2( )            ; <i32> [#uses=0]
         ret void
 }
@@ -71,10 +78,10 @@ declare void @foo() nounwind
 
 define void @t7() nounwind {
 entry:
-; CHECKT2: t7:
-; CHECKT2: blxeq _foo
-; CHECKT2-NEXT: pop.w
-; CHECKT2-NEXT: b _foo
+; CHECKT2D: t7:
+; CHECKT2D: blxeq _foo
+; CHECKT2D-NEXT: pop.w
+; CHECKT2D-NEXT: b.w _foo
   br i1 undef, label %bb, label %bb1.lr.ph
 
 bb1.lr.ph:
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
new file mode 100644
index 0000000..9bdae43
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: 	vadd.f32	q4, q8, q8
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT: 	@DEBUG_VALUE: y <- Q4+0
+;CHECK-NEXT:    @DEBUG_VALUE: x <- Q4+0
+
+
+@.str = external constant [13 x i8]
+
+declare <4 x float> @test0001(float) nounwind readnone ssp
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %entry
+  %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
+  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+
+for.end54:                                        ; preds = %for.body9
+  %tmp115 = extractelement <4 x float> %add19, i32 1
+  %conv6.i75 = fpext float %tmp115 to double, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  %tmp116 = extractelement <4 x float> %add20, i32 1
+  %conv6.i76 = fpext float %tmp116 to double, !dbg !45
+  %call.i83 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
+  ret i32 0, !dbg !49
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !10, !14}
+!llvm.dbg.lv.test0001 = !{!18}
+!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
+!llvm.dbg.lv.printFV = !{!30}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null}
+!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!34 = metadata !{metadata !35, metadata !37}
+!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 79, i32 7, metadata !40, null}
+!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{i32 75, i32 5, metadata !42, null}
+!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
+!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!48 = metadata !{i32 95, i32 3, metadata !25, null}
+!49 = metadata !{i32 99, i32 3, metadata !25, null}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
new file mode 100644
index 0000000..16aeab3
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9376013
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp1
+;CHECK-NEXT:        .long   Ltmp3
+;CHECK-NEXT: Lset9 = Ltmp10-Ltmp9                    @ Loc expr size
+;CHECK-NEXT:        .short  Lset9
+;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
+
+define void @_Z3foov() optsize ssp {
+entry:
+  %call = tail call float @_Z3barv() optsize, !dbg !11
+  tail call void @llvm.dbg.value(metadata !{float %call}, i64 0, metadata !5), !dbg !11
+  %call16 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp7 = fcmp olt float %call, %call16, !dbg !12
+  br i1 %cmp7, label %for.body, label %for.end, !dbg !12
+
+for.body:                                         ; preds = %entry, %for.body
+  %k.08 = phi float [ %inc, %for.body ], [ %call, %entry ]
+  %call4 = tail call float @_Z2f3f(float %k.08) optsize, !dbg !13
+  %inc = fadd float %k.08, 1.000000e+00, !dbg !14
+  %call1 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp = fcmp olt float %inc, %call1, !dbg !12
+  br i1 %cmp, label %for.body, label %for.end, !dbg !12
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !15
+}
+
+declare float @_Z3barv() optsize
+
+declare float @_Z2f2v() optsize
+
+declare float @_Z2f3f(float) optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv._Z3foov = !{!5, !8}
+
+!0 = metadata !{i32 589841, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590080, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 589835, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590080, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 589835, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 589835, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 6, i32 18, metadata !6, null}
+!12 = metadata !{i32 7, i32 3, metadata !6, null}
+!13 = metadata !{i32 8, i32 20, metadata !9, null}
+!14 = metadata !{i32 7, i32 20, metadata !10, null}
+!15 = metadata !{i32 10, i32 1, metadata !6, null}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
deleted file mode 100644
index 34313aa..0000000
--- a/test/CodeGen/ARM/divmod.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
-
-define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
-entry:
-; CHECK: foo:
-; CHECK: bl ___divmodsi4
-; CHECK-NOT: bl ___divmodsi4
-  %div = sdiv i32 %x, %y
-  store i32 %div, i32* %P, align 4
-  %rem = srem i32 %x, %y
-  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
-  store i32 %rem, i32* %arrayidx6, align 4
-  ret void
-}
-
-define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
-entry:
-; CHECK: bar:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: bl ___udivmodsi4
-  %div = udiv i32 %x, %y
-  store i32 %div, i32* %P, align 4
-  %rem = urem i32 %x, %y
-  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
-  store i32 %rem, i32* %arrayidx6, align 4
-  ret void
-}
-
-; rdar://9280991
-@flags = external unnamed_addr global i32
-@tabsize = external unnamed_addr global i32
-
-define void @do_indent(i32 %cols) nounwind {
-entry:
-; CHECK: do_indent:
-  %0 = load i32* @flags, align 4
-  %1 = and i32 %0, 67108864
-  %2 = icmp eq i32 %1, 0
-  br i1 %2, label %bb1, label %bb
-
-bb:
-; CHECK: bl ___divmodsi4
-  %3 = load i32* @tabsize, align 4
-  %4 = srem i32 %cols, %3
-  %5 = sdiv i32 %cols, %3
-  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
-  %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
-  br label %bb1
-
-bb1:
-  %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ]
-  %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0
-  store i8 0, i8* %8, align 1
-  ret void
-}
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
-declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
new file mode 100644
index 0000000..e475508
--- /dev/null
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+target triple = "armv6-apple-macosx10.6"
+
+declare void @func()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+define void @test0() {
+entry:
+  invoke void @func()
+    to label %cont unwind label %lpad
+
+cont:
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception()
+  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
+  unreachable
+}
+
+; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
new file mode 100644
index 0000000..aa06299
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; rdar://9515076
+; (Make sure this doesn't crash.)
+
+define i32 @test(i32 %i) {
+  %t = trunc i32 %i to i4
+  %r = sext i4 %t to i32
+  ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 6aad92f..499c97f 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -61,3 +61,100 @@ b3:
 ; THUMB: orr {{.*}} #4
 ; ARM: orr {{.*}} #4
 }
+
+define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
+; THUMB: test3:
+; ARM: test3:
+
+bb1:
+  %a1 = trunc i32 %tmp to i16
+  %a2 = trunc i16 %a1 to i8
+  %a3 = trunc i8 %a2 to i1
+  %a4 = zext i1 %a3 to i8
+  store i8 %a4, i8* %ptr3
+  %a5 = zext i8 %a4 to i16
+  store i16 %a5, i16* %ptr2
+  %a6 = zext i16 %a5 to i32
+  store i32 %a6, i32* %ptr1
+  br label %bb2
+
+; THUMB: and
+; THUMB: strb
+; THUMB: uxtb
+; THUMB: strh
+; THUMB: uxth
+; ARM: and
+; ARM: strb
+; ARM: uxtb
+; ARM: strh
+; ARM: uxth
+
+bb2:
+  %b1 = trunc i32 %tmp to i16
+  %b2 = trunc i16 %b1 to i8
+  store i8 %b2, i8* %ptr3
+  %b3 = sext i8 %b2 to i16
+  store i16 %b3, i16* %ptr2
+  %b4 = sext i16 %b3 to i32
+  store i32 %b4, i32* %ptr1
+  br label %bb3
+
+; THUMB: strb
+; THUMB: sxtb
+; THUMB: strh
+; THUMB: sxth
+; ARM: strb
+; ARM: sxtb
+; ARM: strh
+; ARM: sxth
+
+bb3:
+  %c1 = load i8* %ptr3
+  %c2 = load i16* %ptr2
+  %c3 = load i32* %ptr1
+  %c4 = zext i8 %c1 to i32
+  %c5 = sext i16 %c2 to i32
+  %c6 = add i32 %c4, %c5
+  %c7 = sub i32 %c3, %c6
+  store i32 %c7, i32* %ptr1
+  ret void
+
+; THUMB: ldrb
+; THUMB: ldrh
+; THUMB: uxtb
+; THUMB: sxth
+; THUMB: add
+; THUMB: sub
+; ARM: ldrb
+; ARM: ldrh
+; ARM: uxtb
+; ARM: sxth
+; ARM: add
+; ARM: sub
+}
+
+; Check loads/stores with globals
+@test4g = external global i32
+
+define void @test4() {
+  %a = load i32* @test4g
+  %b = add i32 %a, 1
+  store i32 %b, i32* @test4g
+  ret void
+
+; THUMB: ldr.n r0, LCPI4_1
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r0, [r0]
+; THUMB: adds r0, #1
+; THUMB: ldr.n r1, LCPI4_0
+; THUMB: ldr r1, [r1]
+; THUMB: str r0, [r1]
+
+; ARM: ldr r0, LCPI4_1
+; ARM: ldr r0, [r0]
+; ARM: ldr r0, [r0]
+; ARM: add r0, r0, #1
+; ARM: ldr r1, LCPI4_0
+; ARM: ldr r1, [r1]
+; ARM: str r0, [r1]
+}
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index f241c26..c4dbeb9 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -10,7 +10,7 @@ entry:
 
 ; HARD: test1:
 ; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
-; HARD: vbsl [[REG1]], d2, d0
+; HARD: vbsl [[REG1]], d
   %0 = tail call float @copysignf(float %x, float %y) nounwind
   ret float %0
 }
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 9facf20..6081712 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -29,7 +29,7 @@ entry:
 ; NEON: vnmla.f32
 
 ; A8: t2:
-; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}}
+; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
 ; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 9d6eba8..58687b9 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -23,3 +23,38 @@ entry:
   %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
   ret void
 }
+
+; Radar 9306086
+
+%0 = type { <8 x i8>, <16 x i8>* }
+
+define hidden void @conv4_8_E() nounwind {
+entry:
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+unreachable
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t3() nounwind {
+entry:
+tail call void asm sideeffect "flds s15, $0 \0A", "^Uv|m,~{s15}"(float 1.000000e+00) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+@k.2126 = internal unnamed_addr global float 1.000000e+00
+define i32 @t4() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t5() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
diff --git a/test/CodeGen/ARM/intrinsics.ll b/test/CodeGen/ARM/intrinsics.ll
new file mode 100644
index 0000000..54cc3e0
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=thumb -mtriple=thumbv7-eabi -mcpu=cortex-a8 | FileCheck %s
+
+define void @coproc() nounwind {
+entry:
+  ; CHECK: mrc
+  %0 = tail call i32 @llvm.arm.mrc(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr
+  tail call void @llvm.arm.mcr(i32 7, i32 1, i32 %0, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mrc2
+  %1 = tail call i32 @llvm.arm.mrc2(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr2
+  tail call void @llvm.arm.mcr2(i32 7, i32 1, i32 %1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcrr
+  tail call void @llvm.arm.mcrr(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: mcrr2
+  tail call void @llvm.arm.mcrr2(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: cdp
+  tail call void @llvm.arm.cdp(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ; CHECK: cdp2
+  tail call void @llvm.arm.cdp2(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ret void
+}
+
+declare void @llvm.arm.cdp2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.cdp(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc(i32, i32, i32, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 2d016f6..1c69e15 100644
--- a/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -10,8 +10,8 @@ entry:
   br i1 %0, label %return, label %bb
 
 bb:
-; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
-; CHECK: str [[REGISTER]], [r2], #4
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [{{r[0-9]+}}], {{r[0-9]+}}
+; CHECK: str [[REGISTER]], [{{r[0-9]+}}], #4
   %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
   %tmp = mul i32 %j.05, %index
   %uglygep = getelementptr i8* %src6, i32 %tmp
diff --git a/test/CodeGen/ARM/ldstrexd.ll b/test/CodeGen/ARM/ldstrexd.ll
new file mode 100644
index 0000000..0c0911a
--- /dev/null
+++ b/test/CodeGen/ARM/ldstrexd.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%0 = type { i32, i32 }
+
+; CHECK: f0:
+; CHECK: ldrexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK: f1:
+; CHECK: strexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
new file mode 100644
index 0000000..e3e6eae
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -0,0 +1,80 @@
+; RUN: llc -regalloc=greedy < %s | FileCheck %s
+
+; LSR shouldn't introduce more induction variables than needed, increasing
+; register pressure and therefore spilling. There is more room for improvement
+; here.
+
+; CHECK: sub sp, #{{32|24}}
+
+; CHECK:      ldr r{{.*}}, [sp, #4]
+; CHECK-NEXT: ldr r{{.*}}, [sp, #16]
+; CHECK-NEXT: ldr r{{.*}}, [sp, #12]
+; CHECK-NEXT: adds
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%struct.partition_entry = type { i32, i32, i64, i64 }
+
+define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {
+entry:
+  %cmp79 = icmp sgt i32 %num_entries, 0
+  br i1 %cmp79, label %outer.loop, label %for.end72
+
+outer.loop:                                 ; preds = %for.inc69, %entry
+  %overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
+  %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
+  %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
+  %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
+  %tmp5 = load i64* %offset, align 4, !tbaa !0
+  %tmp15 = load i64* %len, align 4, !tbaa !0
+  %add = add nsw i64 %tmp15, %tmp5
+  br label %inner.loop
+
+inner.loop:                                       ; preds = %for.inc, %outer.loop
+  %overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]
+  %1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]
+  %cmp23 = icmp eq i32 %0, %1
+  br i1 %cmp23, label %for.inc, label %if.end
+
+if.end:                                           ; preds = %inner.loop
+  %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
+  %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
+  %tmp29 = load i64* %offset28, align 4, !tbaa !0
+  %tmp40 = load i64* %len39, align 4, !tbaa !0
+  %add41 = add nsw i64 %tmp40, %tmp29
+  %cmp44 = icmp sge i64 %tmp29, %tmp5
+  %cmp47 = icmp slt i64 %tmp29, %add
+  %or.cond = and i1 %cmp44, %cmp47
+  %overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178
+  %cmp52 = icmp sle i64 %add41, %add
+  %cmp56 = icmp sgt i64 %add41, %tmp5
+  %or.cond74 = and i1 %cmp52, %cmp56
+  %overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2
+  %cmp61 = icmp sgt i64 %tmp29, %tmp5
+  %cmp65 = icmp slt i64 %add41, %add
+  %or.cond75 = or i1 %cmp61, %cmp65
+  br i1 %or.cond75, label %for.inc, label %if.then66
+
+if.then66:                                        ; preds = %if.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end, %if.then66, %inner.loop
+  %overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]
+  %inc = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %inc, %num_entries
+  br i1 %exitcond, label %for.inc69, label %inner.loop
+
+for.inc69:                                        ; preds = %for.inc
+  %inc71 = add nsw i32 %0, 1
+  %exitcond83 = icmp eq i32 %inc71, %num_entries
+  br i1 %exitcond83, label %for.end72, label %outer.loop
+
+for.end72:                                        ; preds = %for.inc69, %entry
+  %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
+  ret i32 %overlap.0.lcssa
+}
+
+!0 = metadata !{metadata !"long long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 41d5944..032129d 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,10 +1,26 @@
-; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -o - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
+
+@from = common global [500 x i32] zeroinitializer, align 4
+@to = common global [500 x i32] zeroinitializer, align 4
 
 define void @f() {
 entry:
-        call void @llvm.memmove.i32( i8* null, i8* null, i32 64, i32 0 )
-        call void @llvm.memcpy.i32( i8* null, i8* null, i32 64, i32 0 )
-        call void @llvm.memset.i32( i8* null, i8 64, i32 0, i32 0 )
+
+        ; CHECK: memmove
+        ; EABI: __aeabi_memmove
+        call void @llvm.memmove.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+
+        ; CHECK: memcpy
+        ; EABI: __aeabi_memcpy
+        call void @llvm.memcpy.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+
+        ; EABI memset swaps arguments
+        ; CHECK: mov r1, #0
+        ; CHECK: memset
+        ; EABI: mov r2, #0
+        ; EABI: __aeabi_memset
+        call void @llvm.memset.i32( i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0 )
         unreachable
 }
 
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index 886ff3f..991d728 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -1,20 +1,39 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "armv7-eabi"
+; RUN: llc < %s -mtriple=armv7-eabi      | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic            | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static         | FileCheck %s -check-prefix=IOS-STATIC
 
-@foo = common global i32 0                        ; <i32*> [#uses=1]
+@foo = common global i32 0
 
-define arm_aapcs_vfpcc i32* @bar1() nounwind readnone {
+define i32* @bar1() nounwind readnone {
 entry:
-; CHECK:      movw    r0, :lower16:foo
-; CHECK-NEXT: movt    r0, :upper16:foo
+; EABI:      movw    r0, :lower16:foo
+; EABI-NEXT: movt    r0, :upper16:foo
+
+; IOS:      movw    r0, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r0, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+; IOS-PIC-NEXT: movt    r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+
+; IOS-STATIC-NOT:      movw    r0, :lower16:_foo
+; IOS-STATIC-NOT:       movt    r0, :upper16:_foo
   ret i32* @foo
 }
 
-define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind {
+define void @bar2(i32 %baz) nounwind {
 entry:
-; CHECK:      movw    r1, :lower16:foo
-; CHECK-NEXT: movt    r1, :upper16:foo
+; EABI:      movw    r1, :lower16:foo
+; EABI-NEXT: movt    r1, :upper16:foo
+
+; IOS:      movw    r1, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r1, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+; IOS-PIC-NEXT: movt    r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+
+; IOS-STATIC-NOT:      movw    r1, :lower16:_foo
+; IOS-STATIC-NOT:      movt    r1, :upper16:_foo
   store i32 %baz, i32* @foo, align 4
   ret void
 }
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 82ed018..43f8a66 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -5,8 +5,8 @@
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
 ; ARM: t1:
-; ARM: mov r1, #101
-; ARM: orr r1, r1, #1, #24
+; ARM: mov [[R1:r[0-9]+]], #101
+; ARM: orr [[R1b:r[0-9]+]], [[R1]], #1, #24
 ; ARM: movgt r0, #123
 
 ; ARMT2: t1:
@@ -34,7 +34,7 @@ entry:
 ; ARMT2: movwgt r0, #357
 
 ; THUMB2: t2:
-; THUMB2: mov.w r0, #123
+; THUMB2: mov{{(s|\.w)}} r0, #123
 ; THUMB2: movwgt r0, #357
 
   %0 = icmp sgt i32 %c, 1
@@ -53,7 +53,7 @@ entry:
 ; ARMT2: moveq r0, #1
 
 ; THUMB2: t3:
-; THUMB2: mov.w r0, #0
+; THUMB2: mov{{(s|\.w)}} r0, #0
 ; THUMB2: moveq r0, #1
   %0 = icmp eq i32 %a, 160
   %1 = zext i1 %0 to i32
@@ -67,11 +67,11 @@ entry:
 ; ARM: movlt
 
 ; ARMT2: t4:
-; ARMT2: movwlt r0, #65365
-; ARMT2: movtlt r0, #65365
+; ARMT2: movwlt [[R0:r[0-9]+]], #65365
+; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w r0, #11141290
+; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 2f5fadb..82dc14d 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: push
-; CHECK: stmib
+; CHECK: stm
 	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 805aad5..0d7d4ec 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -125,7 +125,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK: vld2lanei32_update:
-;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -153,7 +153,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2laneQi16:
 ;Check the (default) alignment.
-;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
+;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
@@ -166,7 +166,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -222,7 +222,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3lanei16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
-;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -265,7 +265,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3laneQi16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
@@ -280,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vld3laneQi16_update:
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
@@ -344,7 +344,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
@@ -360,7 +360,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8_update:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -380,7 +380,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4lanei16:
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
-;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
@@ -398,7 +398,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
@@ -431,7 +431,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
@@ -448,7 +448,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4laneQi32:
 ;Check the (default) alignment.
-;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index f0f9e4e..34acd16 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -147,3 +147,34 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   store <4 x float> %tmp8, <4 x float>* %v, align 16
   ret void
 }
+
+; vrev <4 x i16> should use VREV32 and not VREV64
+define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
+; CHECK: test_vrev64:
+; CHECK: vext.16
+; CHECK: vrev32.16
+entry:
+  %0 = bitcast <4 x i16>* %source to <8 x i16>*
+  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
+  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
+  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
+  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
+  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
+  ret void
+}
+
+; Test vrev of float4
+define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
+; CHECK: float_vrev64
+; CHECK: vext.32
+; CHECK: vrev64.32
+entry:
+  %0 = bitcast float* %source to <4 x float>*
+  %tmp2 = load <4 x float>* %0, align 4
+  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
+  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index d1bc15a..08b7232 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -54,7 +54,8 @@ define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1laneQi8:
-;CHECK: vst1.8 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.8 {d17[1]}, [r0]
 	%tmp1 = load <16 x i8>* %B
         %tmp2 = extractelement <16 x i8> %tmp1, i32 9
         store i8 %tmp2, i8* %A, align 8
@@ -72,7 +73,8 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32:
-;CHECK: vst1.32 {d17[1]}, [r0, :32]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
 	%tmp1 = load <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
@@ -82,7 +84,8 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32_update:
-;CHECK: vst1.32 {d17[1]}, [r1, :32]!
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
 	%A = load i32** %ptr
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
@@ -94,7 +97,8 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 
 define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst1laneQf:
-;CHECK: vst1.32 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0]
 	%tmp1 = load <4 x float>* %B
         %tmp2 = extractelement <4 x float> %tmp1, i32 3
         store float %tmp2, float* %A
diff --git a/test/CodeGen/Generic/promote-integers.ll b/test/CodeGen/Generic/promote-integers.ll
new file mode 100644
index 0000000..5812592
--- /dev/null
+++ b/test/CodeGen/Generic/promote-integers.ll
@@ -0,0 +1,15 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc -march=x86 -promote-elements < %s | FileCheck %s
+
+; This test is the poster-child for integer-element-promotion.
+; Until this feature is complete, we mark this test as expected to fail.
+; XFAIL: *
+; CHECK: vector_code
+; CHECK: ret
+define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 )  {
+   %C = icmp eq <4 x i64> %A, %B
+   %K = xor <4 x i1> <i1 1, i1 1, i1 1, i1 1>, %C
+   %D = select <4 x i1> %K, <4 x float> %R1, <4 x float> %R0
+   ret <4 x float> %D
+}
+
diff --git a/test/CodeGen/Generic/zero-sized-array.ll b/test/CodeGen/Generic/zero-sized-array.ll
new file mode 100644
index 0000000..280ba00
--- /dev/null
+++ b/test/CodeGen/Generic/zero-sized-array.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s
+; PR9900
+
+%zero = type [0 x i8]
+%foobar = type { i32, %zero }
+
+define void @f(%foobar %arg) {
+  %arg1 = extractvalue %foobar %arg, 0
+  %arg2 = extractvalue %foobar %arg, 1
+  call i32 @f2(%zero %arg2, i32 5, i32 42)
+  ret void
+}
+
+define i32 @f2(%zero %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @f3(%zero %x, i32 %y) {
+  call i32 @f2(%zero %x, i32 5, i32 %y)
+  ret void
+}
+
+define void @f4(%zero %z) {
+  insertvalue %foobar undef, %zero %z, 1
+  ret void
+}
+
+define void @f5(%foobar %x) {
+allocas:
+  %y = extractvalue %foobar %x, 1
+  br  label %b1
+
+b1:
+  %insert120 = insertvalue %foobar undef, %zero %y, 1
+  ret void
+}
+
+define void @f6(%zero %x, %zero %y) {
+b1:
+  br i1 undef, label %end, label %b2
+
+b2:
+  br label %end
+
+end:
+  %z = phi %zero [ %y, %b1 ], [ %x, %b2 ]
+  call void @f4(%zero %z)
+  ret void
+}
+
+%zero2 = type {}
+
+define i32 @g1(%zero2 %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @g2(%zero2 %x, i32 %y) {
+  call i32 @g1(%zero2 %x, i32 5, i32 %y)
+  ret void
+}
+
+%zero2r = type {%zero2}
+
+define i32 @h1(%zero2r %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @h2(%zero2r %x, i32 %y) {
+  call i32 @h1(%zero2r %x, i32 5, i32 %y)
+  ret void
+}
+
+%foobar2 = type { i32, %zero2r }
+
+define void @h3(%foobar2 %arg) {
+  %arg1 = extractvalue %foobar2 %arg, 0
+  %arg2 = extractvalue %foobar2 %arg, 1
+  %arg21 = extractvalue %zero2r %arg2, 0
+  call void @g2(%zero2 %arg21, i32 5)
+  ret void
+}
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 0f5fc12..45342e2 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -10,7 +10,7 @@ define i8 @mov(i8 %a, i8 %b) nounwind {
 
 define i8 @add(i8 %a, i8 %b) nounwind {
 ; CHECK: add:
-; CHECK: add.b	r12, r15
+; CHECK: add.b
 	%1 = add i8 %a, %b
 	ret i8 %1
 }
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index 47382f9..f152acc 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -2,6 +2,10 @@
 ; RUN: grep abs.s  %t | count 1
 ; RUN: grep neg.s %t | count 1
 
+; FIXME: Should not emit abs.s or neg.s since these instructions produce
+;        incorrect results if the operand is NaN.
+; REQUIRES: disabled
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
diff --git a/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll b/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll
new file mode 100644
index 0000000..1255949
--- /dev/null
+++ b/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -verify-coalescing
+; PR10046
+;
+; PHI elimination splits the critical edge from %while.end415 to %if.end427.
+; This requires updating the BNE-J terminators to a BEQ. The BNE instruction
+; kills a virtual register, and LiveVariables must be updated with the new kill
+; instruction.
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-ellcc-linux"
+
+define i32 @mergesort(i8* %base, i32 %nmemb, i32 %size, i32 (i8*, i8*)* nocapture %cmp) nounwind {
+entry:
+  br i1 undef, label %return, label %if.end13
+
+if.end13:                                         ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %if.end13
+  %list1.0482 = phi i8* [ %base, %if.end13 ], [ null, %while.body ]
+  br i1 undef, label %while.end415, label %while.body
+
+while.end415:                                     ; preds = %while.body
+  br i1 undef, label %if.then419, label %if.end427
+
+if.then419:                                       ; preds = %while.end415
+  %call425 = tail call i8* @memmove(i8* %list1.0482, i8* undef, i32 undef) nounwind
+  br label %if.end427
+
+if.end427:                                        ; preds = %if.then419, %while.end415
+  %list2.1 = phi i8* [ undef, %if.then419 ], [ %list1.0482, %while.end415 ]
+  tail call void @free(i8* %list2.1)
+  unreachable
+
+return:                                           ; preds = %entry
+  ret i32 -1
+}
+
+
+declare i8* @memmove(i8*, i8*, i32)
+
+declare void @free(i8*)
+
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
new file mode 100644
index 0000000..50eeecf
--- /dev/null
+++ b/test/CodeGen/Mips/alloca.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+define i32 @twoalloca(i32 %size) nounwind {
+entry:
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addu  $[[SP1:[0-9]+]], $zero, $sp
+; CHECK: subu  $[[T1:[0-9]+]], $sp, $[[SZ]]
+; CHECK: addu  $sp, $zero, $[[T1]]
+; CHECK: addu  $[[SP2:[0-9]+]], $zero, $sp
+; CHECK: lw  $25, %call16(foo)($gp)
+; CHECK: addiu $4, $[[SP1]], 24
+; CHECK: jalr  $25
+; CHECK: lw  $25, %call16(foo)($gp)
+; CHECK: addiu $4, $[[SP2]], 24
+; CHECK: jalr  $25
+  %tmp1 = alloca i8, i32 %size, align 4
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
+  store i8 97, i8* %add.ptr, align 1
+  %tmp4 = alloca i8, i32 %size, align 4
+  call void @foo2(double 1.000000e+00, double 2.000000e+00, i32 3) nounwind
+  %call = call i32 @foo(i8* %tmp1) nounwind
+  %call7 = call i32 @foo(i8* %tmp4) nounwind
+  %add = add nsw i32 %call7, %call
+  ret i32 %add
+}
+
+declare void @foo2(double, double, i32)
+
+declare i32 @foo(i8*)
+
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
new file mode 100644
index 0000000..2d5555b
--- /dev/null
+++ b/test/CodeGen/Mips/atomic.ll
@@ -0,0 +1,253 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
+
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.swap.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
+
+
+@x = common global i32 0, align 4
+
+define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* @x, i32 %incr)
+  ret i32 %0
+
+; CHECK:   AtomicLoadAdd32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* @x, i32 %incr)
+  ret i32 %0
+
+; CHECK:   AtomicLoadNand32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   and     $[[R1]], $[[R1]], $4
+; CHECK:   nor     $[[R2:[0-9]+]], $zero, $[[R1]]
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicSwap32(i32 %oldval) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval)
+  ret i32 %0
+
+; CHECK:   AtomicSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   sw      $4, [[OFFSET:[0-9]+]]($sp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   lw      $[[R2:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R3:[0-9]+]], $zero, $[[R2]]
+; CHECK:   sc      $[[R3]], 0($[[R0]])
+; CHECK:   beq     $[[R3]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval)
+  ret i32 %0
+
+; CHECK:   AtomicCmpSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   sw      $5, [[OFFSET:[0-9]+]]($sp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $2, 0($[[R0]])
+; CHECK:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; CHECK:   lw      $[[R1:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R2:[0-9]+]], $zero, $[[R1]]
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK:   $[[BB1]]:
+}
+
+
+
+@y = common global i8 0, align 1
+
+define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadAdd8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadSub8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   subu    $[[R18:[0-9]+]], $zero, $4
+; CHECK:   andi    $[[R8:[0-9]+]], $[[R18]], 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadNand8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval)
+  ret i8 %0
+
+; CHECK:   AtomicSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK:   sw      $[[R9]], [[OFFSET:[0-9]+]]($sp)
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   lw      $[[R18:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @y, i8 %oldval, i8 %newval)
+  ret i8 %0
+
+; CHECK:   AtomicCmpSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK:   andi    $[[R10:[0-9]+]], $5, 255
+; CHECK:   sll     $[[R11:[0-9]+]], $[[R10]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R12:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
+; CHECK:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
+
+; CHECK:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
+; CHECK:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
+; CHECK:   sc      $[[R15]], 0($[[R2]])
+; CHECK:   beq     $[[R15]], $zero, $[[BB0]]
+
+; CHECK:   $[[BB1]]:
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R13]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index e9af304..6de6b77 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -8,14 +8,14 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp1)($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp1)
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp2)($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp2)
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp1)
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp1)
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp2)
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp2)
+; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
+; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 8329c89..ec37961 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -4,8 +4,8 @@
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
 ; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -14,3 +14,19 @@ entry:
   ret i32* %cond
 }
 
+@c = global i32 1, align 4
+@d = global i32 0, align 4
+
+; CHECK: cmov2:
+; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+define i32 @cmov2(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32* @c, align 4
+  %tmp2 = load i32* @d, align 4
+  %cond = select i1 %tobool, i32 %tmp1, i32 %tmp2
+  ret i32 %cond
+}
+
diff --git a/test/CodeGen/Mips/double2int.ll b/test/CodeGen/Mips/double2int.ll
new file mode 100644
index 0000000..3d033e1
--- /dev/null
+++ b/test/CodeGen/Mips/double2int.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+define i32 @f1(double %d) nounwind readnone {
+entry:
+; CHECK: trunc.w.d $f{{[0-9]+}}, $f12
+  %conv = fptosi double %d to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
new file mode 100644
index 0000000..765b778
--- /dev/null
+++ b/test/CodeGen/Mips/eh.ll
@@ -0,0 +1,78 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips   -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EB
+
+@g1 = global double 0.000000e+00, align 8
+@_ZTId = external constant i8*
+
+define void @_Z1fd(double %i2) {
+entry:
+; CHECK-EL:  addiu $sp, $sp
+; CHECK-EL:  .cfi_def_cfa_offset
+; CHECK-EL:  sdc1 $f20
+; CHECK-EL:  sw  $ra
+; CHECK-EL:  sw  $17
+; CHECK-EL:  sw  $16
+; CHECK-EL:  .cfi_offset 52, -8
+; CHECK-EL:  .cfi_offset 53, -4
+; CHECK-EB:  .cfi_offset 53, -8
+; CHECK-EB:  .cfi_offset 52, -4
+; CHECK-EL:  .cfi_offset 31, -12
+; CHECK-EL:  .cfi_offset 17, -16
+; CHECK-EL:  .cfi_offset 16, -20
+; CHECK-EL:  .cprestore 
+
+  %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
+  %0 = bitcast i8* %exception to double*
+  store double 3.200000e+00, double* %0, align 8, !tbaa !0
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTId to i8*), i8* null) noreturn
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+; CHECK-EL:  # %lpad
+; CHECK-EL:  lw  $gp
+; CHECK-EL:  beq $5
+
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTId to i8*)) nounwind
+  %1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTId to i8*)) nounwind
+  %2 = icmp eq i32 %eh.selector, %1
+  br i1 %2, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  %4 = bitcast i8* %3 to double*
+  %exn.scalar = load double* %4, align 8
+  %add = fadd double %exn.scalar, %i2
+  store double %add, double* @g1, align 8, !tbaa !0
+  tail call void @__cxa_end_catch() nounwind
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  tail call void @llvm.eh.resume(i8* %exn, i32 %eh.selector) noreturn
+  unreachable
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
new file mode 100644
index 0000000..14c6507
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EB
+
+define double @func0(double %d0, double %d1) nounwind readnone {
+entry:
+; CHECK-EL: func0:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
+; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EL: mtc1 $[[LO0]], $f0
+; CHECK-EL: mtc1 $[[OR]], $f1
+;
+; CHECK-EB: lui $[[T0:[0-9]+]], 32767
+; CHECK-EB: lui $[[T1:[0-9]+]], 32768
+; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
+; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
+; CHECK-EB: mtc1 $[[OR]], $f0
+; CHECK-EB: mtc1 $[[LO0]], $f1
+  %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
+  ret double %call
+}
+
+declare double @copysign(double, double) nounwind readnone
+
+define float @func1(float %f0, float %f1) nounwind readnone {
+entry:
+; CHECK-EL: func1:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
+; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
+; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; CHECK-EL: mtc1 $[[T4]], $f0
+  %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
+  ret float %call
+}
+
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
new file mode 100644
index 0000000..c48ce7e
--- /dev/null
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @f() nounwind {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK:   addu    $fp, $sp, $zero
+; CHECK:   addu    $2, $zero, $fp
+}
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
new file mode 100644
index 0000000..ee7e131
--- /dev/null
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@p = external global i32
+@q = external global i32
+@r = external global i32
+
+define void @f0() nounwind {
+entry:
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+  tail call void (...)* @f1() nounwind
+  %tmp = load i32* @p, align 4
+  tail call void @f2(i32 %tmp) nounwind
+  %tmp1 = load i32* @q, align 4
+  %tmp2 = load i32* @r, align 4
+  tail call void @f3(i32 %tmp1, i32 %tmp2) nounwind
+  ret void
+}
+
+declare void @f1(...)
+
+declare void @f2(i32)
+
+declare void @f3(i32, i32)
+
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
new file mode 100644
index 0000000..9a30453
--- /dev/null
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
+entry:
+; CHECK: addu $[[R1:[0-9]+]], $zero, $5
+; CHECK: addu $[[R0:[0-9]+]], $zero, $4
+; CHECK: lw  $25, %call16(ff1)
+; CHECK: ori $6, ${{[0-9]+}}, 3855
+; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: jalr
+  tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
+; CHECK: lw $25, %call16(ff2)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
+; CHECK: addu $4, $zero, $[[R2]]
+; CHECK: addu $5, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
+  %sub = add nsw i32 %i, -1
+; CHECK: sw $[[R0]], 24($sp)
+; CHECK: sw $[[R1]], 28($sp)
+; CHECK: lw $25, %call16(ff3)
+; CHECK: addu $6, $zero, $[[R2]]
+; CHECK: addu $7, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
+  ret void
+}
+
+declare void @ff1(i32, i64)
+
+declare void @ff2(i64, double)
+
+declare void @ff3(i32, i64, i32, i64)
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index fdfa01a..50d0993 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc  < %s -march=mips | FileCheck %s
+; RUN: llc  < %s -march=mipsel -mcpu=4ke  | FileCheck %s
 
 @caller.sf1 = internal unnamed_addr global void (...)* null, align 4
 @gf1 = external global void (...)*
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
new file mode 100644
index 0000000..fd7ae9e
--- /dev/null
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+%struct.S1 = type { [65536 x i8] }
+
+@s1 = external global %struct.S1
+
+define void @f() nounwind {
+entry:
+; CHECK:  lui $at, 65534
+; CHECK:  addu  $at, $sp, $at
+; CHECK:  addiu $sp, $at, -16
+; CHECK:  .cprestore  65536
+
+  %agg.tmp = alloca %struct.S1, align 1
+  %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
+  call void @f2(%struct.S1* byval %agg.tmp) nounwind
+  ret void
+}
+
+declare void @f2(%struct.S1* byval)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
new file mode 100644
index 0000000..b78c393
--- /dev/null
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -0,0 +1,127 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+%0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
+%struct.S1 = type { i8, i16, i32, i64, double, i32 }
+%struct.S2 = type { [4 x i32] }
+%struct.S3 = type { i8 }
+
+@f1.s1 = internal unnamed_addr constant %0 { i8 1, i16 2, i32 3, i64 4, double 5.000000e+00, i32 6, [4 x i8] undef }, align 8
+@f1.s2 = internal unnamed_addr constant %struct.S2 { [4 x i32] [i32 7, i32 8, i32 9, i32 10] }, align 4
+
+define void @f1() nounwind {
+entry:
+; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
+; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
+; CHECK: sw  $[[R2]], 16($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R6]], 36($sp)
+; CHECK: lw  $6, 0($[[R0]])
+; CHECK: lw  $7, 4($[[R0]])
+  %agg.tmp10 = alloca %struct.S3, align 4
+  call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  call void @callee2(%struct.S2* byval @f1.s2) nounwind
+  %tmp11 = getelementptr inbounds %struct.S3* %agg.tmp10, i32 0, i32 0
+  store i8 11, i8* %tmp11, align 4
+  call void @callee3(float 2.100000e+01, %struct.S3* byval %agg.tmp10, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  ret void
+}
+
+declare void @callee1(float, %struct.S1* byval)
+
+declare void @callee2(%struct.S2* byval)
+
+declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
+
+define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
+; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
+; CHECK: lw  $4, 88($sp)
+; CHECK: sw  $[[R3]], 16($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: mfc1 $6, $f[[F0]]
+
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp = load i32* %i2, align 4, !tbaa !0
+  %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
+  %tmp1 = load double* %d, align 8, !tbaa !3
+  %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
+  %tmp2 = load i64* %ll, align 8, !tbaa !4
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp3 = load i32* %i, align 4, !tbaa !0
+  %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
+  %tmp4 = load i16* %s, align 2, !tbaa !5
+  %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
+  %tmp5 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
+  ret void
+}
+
+declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
+
+define void @f3(%struct.S2* nocapture byval %s2) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $4, 56($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: lw  $4, 56($sp)
+; CHECK: sw  $[[R0]], 24($sp)
+
+  %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
+  %tmp = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
+  %tmp3 = load i32* %arrayidx2, align 4, !tbaa !0
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
+  ret void
+}
+
+define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
+; CHECK: lw  $4, 68($sp)
+; CHECK: sw  $[[R1]], 24($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp = load i32* %i, align 4, !tbaa !0
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp1 = load i32* %i2, align 4, !tbaa !0
+  %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
+  %tmp2 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"long long", metadata !1}
+!5 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 1f71ed2..14ce04b 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
-; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
 
 
 ; All test functions do the same thing - they return the first variable
@@ -30,11 +29,11 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va1:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $7, 44($sp)
-; CHECK: sw      $6, 40($sp)
-; CHECK: sw      $5, 36($sp)
-; CHECK: lw      $2, 36($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: lw      $2, 20($sp)
 }
 
 ; check whether the variable double argument will be accessed from the 8-byte
@@ -56,11 +55,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va2:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: sw      $6, 48($sp)
-; CHECK: sw      $5, 44($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 20
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -84,10 +83,10 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va3:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: sw      $6, 48($sp)
-; CHECK: lw      $2, 48($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: lw      $2, 24($sp)
 }
 
 ; double
@@ -107,14 +106,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va4:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: sw      $7, 60($sp)
-; CHECK: sw      $6, 56($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 56
-; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
-; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
-; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
-; CHECK: ldc1    $f0, 0($[[R3]])
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: sw      $6, 32($sp)
+; CHECK: addiu   ${{[0-9]+}}, $sp, 32
+; CHECK: ldc1    $f0, 32($sp)
 }
 
 ; int
@@ -138,9 +134,9 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va5:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: lw      $2, 52($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: lw      $2, 36($sp)
 }
 
 ; double
@@ -164,9 +160,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va6:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: sw      $7, 60($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 60
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 36
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -192,8 +188,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va7:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: lw      $2, 56($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: lw      $2, 40($sp)
 }
 
 ; double
@@ -215,12 +211,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va8:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 64
-; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
-; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
-; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
-; CHECK: ldc1    $f0, 0($[[R3]])
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   ${{[0-9]+}}, $sp, 48
+; CHECK: ldc1    $f0, 48($sp)
 }
 
 ; int
@@ -244,8 +237,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va9:
-; CHECK: addiu   $sp, $sp, -56
-; CHECK: lw      $2, 76($sp)
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: lw      $2, 52($sp)
 }
 
 ; double
@@ -269,8 +262,8 @@ entry:
   ret double %tmp
 
 ; CHECK: va10:
-; CHECK: addiu   $sp, $sp, -56
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 76
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 52
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
new file mode 100644
index 0000000..034738b
--- /dev/null
+++ b/test/CodeGen/Mips/tls.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=mipsel -mcpu=mips2 -relocation-model=static < %s \
+; RUN:                             | FileCheck %s -check-prefix=STATIC
+
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+; CHECK: f1:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t1)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
+; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
+; STATIC:   lw      $2, 0($[[R2]])
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+; CHECK: f2:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t2)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
+; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
+; STATIC:   lw      $2, 0($[[R1]])
+}
diff --git a/test/CodeGen/Mips/weak.ll b/test/CodeGen/Mips/weak.ll
new file mode 100644
index 0000000..09dd2a4
--- /dev/null
+++ b/test/CodeGen/Mips/weak.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@t = common global i32 (...)* null, align 4
+
+define void @f() nounwind {
+entry:
+  store i32 (...)* @test_weak, i32 (...)** @t, align 4
+  ret void
+}
+
+; CHECK: .weak test_weak
+declare extern_weak i32 @test_weak(...)
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
new file mode 100644
index 0000000..2f793de
--- /dev/null
+++ b/test/CodeGen/PTX/cvt.ll
@@ -0,0 +1,234 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; preds 
+; (note: we convert back to i32 to return)
+
+define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
+; CHECK: cvt.pred.u16 p0, rh1;
+; CHECK: ret;
+	%a = trunc i16 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
+; CHECK: cvt.pred.u32 p0, r1;
+; CHECK: ret;
+	%a = trunc i32 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
+; CHECK: cvt.pred.u64 p0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
+; CHECK: cvt.rni.pred.f32 p0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
+; CHECK: cvt.rni.pred.f64 p0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+; i16
+
+define ptx_device i16 @cvt_i16_preds(i1 %x) {
+; CHECK: cvt.u16.pred rh0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i32(i32 %x) {
+; CHECK: cvt.u16.u32 rh0, r1;
+; CHECK: ret;
+	%a = trunc i32 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i64(i64 %x) {
+; CHECK: cvt.u16.u64 rh0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f32(float %x) {
+; CHECK: cvt.rni.u16.f32 rh0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f64(double %x) {
+; CHECK: cvt.rni.u16.f64 rh0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i16
+	ret i16 %a
+}
+
+; i32
+
+define ptx_device i32 @cvt_i32_preds(i1 %x) {
+; CHECK: cvt.u32.pred r0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i16(i16 %x) {
+; CHECK: cvt.u32.u16 r0, rh1;
+; CHECK: ret;
+	%a = zext i16 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i64(i64 %x) {
+; CHECK: cvt.u32.u64 r0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f32(float %x) {
+; CHECK: cvt.rni.u32.f32 r0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f64(double %x) {
+; CHECK: cvt.rni.u32.f64 r0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i32
+	ret i32 %a
+}
+
+; i64
+
+define ptx_device i64 @cvt_i64_preds(i1 %x) {
+; CHECK: cvt.u64.pred rd0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i16(i16 %x) {
+; CHECK: cvt.u64.u16 rd0, rh1;
+; CHECK: ret;
+	%a = zext i16 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i32(i32 %x) {
+; CHECK: cvt.u64.u32 rd0, r1;
+; CHECK: ret;
+	%a = zext i32 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f32(float %x) {
+; CHECK: cvt.rni.u64.f32 rd0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f64(double %x) {
+; CHECK: cvt.rni.u64.f64 rd0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i64
+	ret i64 %a
+}
+
+; f32
+
+define ptx_device float @cvt_f32_preds(i1 %x) {
+; CHECK: cvt.rn.f32.pred f0, p1;
+; CHECK: ret;
+	%a = uitofp i1 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i16(i16 %x) {
+; CHECK: cvt.rn.f32.u16 f0, rh1;
+; CHECK: ret;
+	%a = uitofp i16 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i32(i32 %x) {
+; CHECK: cvt.rn.f32.u32 f0, r1;
+; CHECK: ret;
+	%a = uitofp i32 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i64(i64 %x) {
+; CHECK: cvt.rn.f32.u64 f0, rd1;
+; CHECK: ret;
+	%a = uitofp i64 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_f64(double %x) {
+; CHECK: cvt.rn.f32.f64 f0, fd1;
+; CHECK: ret;
+	%a = fptrunc double %x to float
+	ret float %a
+}
+
+; f64
+
+define ptx_device double @cvt_f64_preds(i1 %x) {
+; CHECK: cvt.rn.f64.pred fd0, p1;
+; CHECK: ret;
+	%a = uitofp i1 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i16(i16 %x) {
+; CHECK: cvt.rn.f64.u16 fd0, rh1;
+; CHECK: ret;
+	%a = uitofp i16 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i32(i32 %x) {
+; CHECK: cvt.rn.f64.u32 fd0, r1;
+; CHECK: ret;
+	%a = uitofp i32 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i64(i64 %x) {
+; CHECK: cvt.rn.f64.u64 fd0, rd1;
+; CHECK: ret;
+	%a = uitofp i64 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_f32(float %x) {
+; CHECK: cvt.f64.f32 fd0, f1;
+; CHECK: ret;
+	%a = fpext float %x to double
+	ret double %a
+}
diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll
new file mode 100644
index 0000000..22eeda3
--- /dev/null
+++ b/test/CodeGen/PTX/fneg.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x) {
+; CHECK: neg.f32 f0, f1;
+; CHECK-NEXT: ret;
+	%y = fsub float -0.000000e+00, %x
+	ret float %y
+}
+
+define ptx_device double @t1_f64(double %x) {
+; CHECK: neg.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+	%y = fsub double -0.000000e+00, %x
+	ret double %y
+}
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
new file mode 100644
index 0000000..ad7b341
--- /dev/null
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  ret double %b
+}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
index ac33fef..92effa6 100644
--- a/test/CodeGen/PTX/options.ll
+++ b/test/CodeGen/PTX/options.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
 ; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
 ; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
 ; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
 ; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
 ; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll
new file mode 100644
index 0000000..6f1b03e
--- /dev/null
+++ b/test/CodeGen/PTX/selp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
+; CHECK: selp.u32 r0, r1, r2, p1;
+	%a = select i1 %x, i32 %y, i32 %z
+	ret i32 %a
+}
+
+define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
+; CHECK: selp.u64 rd0, rd1, rd2, p1;
+	%a = select i1 %x, i64 %y, i64 %z
+	ret i64 %a
+}
+
+define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
+; CHECK: selp.f32 f0, f1, f2, p1;
+	%a = select i1 %x, float %y, float %z
+	ret float %a
+}
+
+define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
+; CHECK: selp.f64 fd0, fd1, fd2, p1;
+	%a = select i1 %x, double %y, double %z
+	ret double %a
+}
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index d094509..6b31397 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 -join-physregs | FileCheck %s
 ; ModuleID = 'nn.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin11.0"
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index e46e1ec..318ccb0 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -1,14 +1,12 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 6, 3}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 4, 2}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 3, 0}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {mr 5, 3}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
 
 declare void @bar(i64 %x, i64 %y)
 
+; CHECK: li 4, 2
+; CHECK: li {{[53]}}, 0
+; CHECK: li 6, 3
+; CHECK: mr {{[53]}}, {{[53]}}
+
 define void @foo() {
   call void @bar(i64 2, i64 3)
   ret void
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ac56625..29c620e 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -relocation-model=pic -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=PIC
 ; RUN: llc < %s -relocation-model=static -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic -march=ppc64 -mtriple=powerpc64-apple-darwin | FileCheck %s -check-prefix=PPC64
 
 @nextaddr = global i8* null                       ; <i8**> [#uses=2]
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
@@ -7,6 +8,7 @@
 define internal i32 @foo(i32 %i) nounwind {
 ; PIC: foo:
 ; STATIC: foo:
+; PPC64: foo:
 entry:
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
@@ -18,6 +20,8 @@ bb2:                                              ; preds = %entry, %bb3
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
 ; STATIC-NEXT: bctr
+; PPC64: mtctr
+; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 bb3:                                              ; preds = %entry
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index 506d3a8..5393392 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -6,7 +6,6 @@ define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32
 entry:
 ;CHECK: make_foo
 ;CHECK: ld [%fp+64], {{.+}}
-;CHECK: or {{.+}}, {{.+}}, %i0
 ;CHECK: jmp %i7+12
   %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
   store i32 %a, i32* %0, align 4
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index d6ca0d7..7876557 100644
--- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -11,7 +11,7 @@
 
 define i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
 ; CHECK: t:
-; CHECK: adds r0, #8
+; CHECK: adds {{r[0-7]}}, #8
 entry:
   %val = alloca i64, align 4                      ; <i64*> [#uses=3]
   %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
new file mode 100644
index 0000000..ed55bb5
--- /dev/null
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s
+; rdar://problem/9416774
+; ModuleID = 'reduced.ll'
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-ios"
+
+%struct.MMMMMMMMMMMM = type { [4 x %struct.RRRRRRRR] }
+%struct.RRRRRRRR = type { [78 x i32] }
+
+@kkkkkk = external constant i8*
+@__PRETTY_FUNCTION__._ZN12CLGll = private unnamed_addr constant [62 x i8] c"static void tttttttttttt::lllllllllllll(const MMMMMMMMMMMM &)\00"
+@.str = private unnamed_addr constant [75 x i8] c"\09GGGGGGGGGGGGGGGGGGGGGGG:,BE:0x%08lx,ALM:0x%08lx,LTO:0x%08lx,CBEE:0x%08lx\0A\00"
+
+define void @_ZN12CLGll(%struct.MMMMMMMMMMMM* %aidData) ssp align 2 {
+entry:
+  %aidData.addr = alloca %struct.MMMMMMMMMMMM*, align 4
+  %agg.tmp = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp4 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp10 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp16 = alloca %struct.RRRRRRRR, align 4
+  store %struct.MMMMMMMMMMMM* %aidData, %struct.MMMMMMMMMMMM** %aidData.addr, align 4
+  br label %do.body
+
+do.body:                                          ; preds = %entry
+  %tmp = load i8** @kkkkkk, align 4
+  %tmp1 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
+  %tmp2 = bitcast %struct.RRRRRRRR* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.RRRRRRRR* %arrayidx to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 312, i32 4, i1 false)
+  %tmp5 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
+  %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
+  %tmp8 = bitcast %struct.RRRRRRRR* %agg.tmp4 to i8*
+  %tmp9 = bitcast %struct.RRRRRRRR* %arrayidx7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp8, i8* %tmp9, i32 312, i32 4, i1 false)
+  %tmp11 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
+  %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
+  %tmp14 = bitcast %struct.RRRRRRRR* %agg.tmp10 to i8*
+  %tmp15 = bitcast %struct.RRRRRRRR* %arrayidx13 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp14, i8* %tmp15, i32 312, i32 4, i1 false)
+  %tmp17 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
+  %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
+  %tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
+  %tmp21 = bitcast %struct.RRRRRRRR* %arrayidx19 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp20, i8* %tmp21, i32 312, i32 4, i1 false)
+  call void (i8*, i32, i8*, i8*, ...)* @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
+  br label %do.end
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+declare void @CLLoggingLog(i8*, i32, i8*, i8*, ...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 3594424..9aee910 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -12,7 +12,7 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
-; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: ldmia.w sp!,
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 41f7f29..47d7a9c 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -7,8 +7,8 @@ entry:
 ; CHECK: Callee:
 ; CHECK: push
 ; CHECK: mov r4, sp
-; CHECK: sub.w r12, r4, #1000
-; CHECK: mov sp, r12
+; CHECK: sub.w [[R12:r[0-9]+]], r4, #1000
+; CHECK: mov sp, [[R12]]
   %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
   br i1 %0, label %bb2, label %bb
 
diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
new file mode 100644
index 0000000..9e6d78e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
+
+; CHECK: Perl_ck_sort
+; CHECK: ldr
+; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]]
+; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+
+define void @Perl_ck_sort() nounwind optsize {
+entry:
+  %tmp27 = load %struct.op** undef, align 4
+  switch i16 undef, label %if.end151 [
+    i16 178, label %if.then60
+    i16 177, label %if.then60
+  ]
+
+if.then60:                                        ; preds = %if.then40
+  br i1 undef, label %if.then67, label %if.end95
+
+if.then67:                                        ; preds = %if.then60
+  %op_next71 = getelementptr inbounds %struct.op* %tmp27, i32 0, i32 0
+  store %struct.op* %tmp27, %struct.op** %op_next71, align 4
+  %0 = getelementptr inbounds %struct.op* %tmp27, i32 1, i32 0
+  br label %if.end95
+
+if.end95:                                         ; preds = %if.else92, %if.then67
+  %.pre-phi = phi %struct.op** [ undef, %if.then60 ], [ %0, %if.then67 ]
+  %tmp98 = load %struct.op** %.pre-phi, align 4
+  br label %if.end151
+
+if.end151:                                        ; preds = %if.end100, %if.end, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 0e76770..3612e27 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -30,9 +30,8 @@ entry:
 define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
 entry:
 ; CHECK: f3
-; CHECK: lsrs  r2, r0, #7
-; CHECK: mov r0, r1
-; CHECK: bfi r0, r2, #7, #16
+; CHECK: lsrs {{.*}}, #7
+; CHECK: bfi {{.*}}, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
   %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
@@ -42,8 +41,8 @@ entry:
 ; rdar://8752056
 define i32 @f4(i32 %a) nounwind {
 ; CHECK: f4
-; CHECK: movw r1, #3137
-; CHECK: bfi r1, r0, #15, #5
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{.*}}, #15, #5
   %1 = shl i32 %a, 15
   %ins7 = and i32 %1, 1015808
   %ins12 = or i32 %ins7, 3137
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 10a4985..0992fa8 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -3,26 +3,29 @@
 
 declare double @floor(double) nounwind readnone
 
-define void @t(i1 %a, double %b) {
+define void @t(i32 %c, double %b) {
 entry:
-  br i1 %a, label %bb3, label %bb1
+  %cmp1 = icmp ne i32 %c, 0
+  br i1 %cmp1, label %bb3, label %bb1
 
 bb1:                                              ; preds = %entry
   unreachable
 
 bb3:                                              ; preds = %entry
-  br i1 %a, label %bb7, label %bb5
+  %cmp2 = icmp ne i32 %c, 0
+  br i1 %cmp2, label %bb7, label %bb5
 
 bb5:                                              ; preds = %bb3
   unreachable
 
 bb7:                                              ; preds = %bb3
-  br i1 %a, label %bb11, label %bb9
+  %cmp3 = icmp ne i32 %c, 0
+  br i1 %cmp3, label %bb11, label %bb9
 
 bb9:                                              ; preds = %bb7
-; CHECK:      cmp r0, #0
-; CHECK:      cmp r0, #0
-; CHECK-NEXT: cbnz
+; CHECK:      cmp	r0, #0
+; CHECK:      cmp	r0, #0
+; CHECK-NEXT:      cbnz
   %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index eeaaa7fb..df221b9 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 63249f4..da12114 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 55c321d..15052e0 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 69f0383..566408a 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
 
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index 0f122f2..cdd3489 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index d905217..47f553f 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
 
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index db202dd..405b3bb 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 35b0159..6c5a4fb 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | grep setnp
-; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse | grep setnp
+; RUN: llc < %s -march=x86 -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
deleted file mode 100644
index dee7415..0000000
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s
-; PR2536
-
-; CHECK: andl    $65534, %
-; CHECK-NEXT: movl %
-; CHECK-NEXT: movzwl
-
-@g_5 = external global i16		; <i16*> [#uses=2]
-@g_107 = external global i16		; <i16*> [#uses=1]
-@g_229 = external global i32		; <i32*> [#uses=1]
-@g_227 = external global i16		; <i16*> [#uses=1]
-
-define i32 @func_54(i32 %p_55, i16 zeroext  %p_56) nounwind  {
-entry:
-	load i16* @g_5, align 2		; <i16>:0 [#uses=1]
-	zext i16 %0 to i32		; <i32>:1 [#uses=1]
-	%.mask = and i32 %1, 65534		; <i32> [#uses=1]
-	icmp eq i32 %.mask, 0		; <i1>:2 [#uses=1]
-	load i32* @g_229, align 4		; <i32>:3 [#uses=1]
-	load i16* @g_227, align 2		; <i16>:4 [#uses=1]
-	icmp eq i16 %4, 0		; <i1>:5 [#uses=1]
-	load i16* @g_5, align 2		; <i16>:6 [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb7.preheader, %entry
-	%indvar4 = phi i32 [ 0, %entry ], [ %indvar.next5, %bb7.preheader ]		; <i32> [#uses=1]
-	%p_56_addr.1.reg2mem.0 = phi i16 [ %p_56, %entry ], [ %p_56_addr.0, %bb7.preheader ]		; <i16> [#uses=2]
-	br i1 %2, label %bb7.preheader, label %bb5
-
-bb5:		; preds = %bb
-	store i16 %6, i16* @g_107, align 2
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb5, %bb
-	icmp eq i16 %p_56_addr.1.reg2mem.0, 0		; <i1>:7 [#uses=1]
-	%.0 = select i1 %7, i32 1, i32 %3		; <i32> [#uses=1]
-	urem i32 1, %.0		; <i32>:8 [#uses=1]
-	icmp eq i32 %8, 0		; <i1>:9 [#uses=1]
-	%.not = xor i1 %9, true		; <i1> [#uses=1]
-	%.not1 = xor i1 %5, true		; <i1> [#uses=1]
-	%brmerge = or i1 %.not, %.not1		; <i1> [#uses=1]
-	%iftmp.6.0 = select i1 %brmerge, i32 3, i32 0		; <i32> [#uses=1]
-	mul i32 %iftmp.6.0, %3		; <i32>:10 [#uses=1]
-	icmp eq i32 %10, 0		; <i1>:11 [#uses=1]
-	%p_56_addr.0 = select i1 %11, i16 %p_56_addr.1.reg2mem.0, i16 1		; <i16> [#uses=1]
-	%indvar.next5 = add i32 %indvar4, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp eq i32 %indvar.next5, 17		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb25, label %bb
-
-bb25:		; preds = %bb7.preheader
-	ret i32 1
-}
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index b92c789..1d27fc5 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,5 +1,5 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ebp | count 9
 ; RUN: llc < %s | grep %ecx | count 5
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 00ab735..d423bfc 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,5 +1,5 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rbp | count 7
 ; RUN: llc < %s | grep %rcx | count 3
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 947a1f1..dfd165c 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,12 +1,32 @@
-; RUN: llc < %s -march=x86 -regalloc=linearscan | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
-; RUN: llc < %s -march=x86 -regalloc=fast       | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
-; RUN: llc < %s -march=x86 -regalloc=basic      | grep "#%ebp %esi %edx 8(%edi) %eax %bl"
-; RUN: llc < %s -march=x86 -regalloc=greedy     | grep "#%edx %edi %ebp 8(%esi) %eax %bl"
+; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
 
-; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
+; The 1st, 2nd, 3rd and 5th registers must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
-; operand.  There are many combinations that work; this is what llc puts out now.
-; ModuleID = '<stdin>'
+; operand.
+;
+; CHECK: 1st=[[A1:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK: 2nd=[[A2:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK: 3rd=[[A3:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK-NOT: [[A3]]
+; CHECK: 5th=[[A5:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT; [[A5]]
+; CHECK: =4th
+
+; The 6th operand is an 8-bit register, and it mustn't alias the 1st and 5th.
+; CHECK: 1%e[[S1:.]]x
+; CHECK: 5%e[[S5:.]]x
+; CHECK-NOT: %[[S1]]
+; CHECK-NOT: %[[S5]]
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 	%struct.foo = type { i32, i32, i8* }
@@ -19,7 +39,7 @@ entry:
 	%3 = load i32* %0, align 4		; <i32> [#uses=1]
 	%4 = load i32* %1, align 4		; <i32> [#uses=1]
 	%5 = load i8* %state, align 1		; <i8> [#uses=1]
-	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#$0 $1 $2 $3 $4 $5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
+	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
 	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
 	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
 	store i32 %asmresult1, i32* %0
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 5eba9b9..75e0b8a 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -4,7 +4,7 @@
 
 ; CHECK:         ## InlineAsm End
 ; CHECK-NEXT: BB0_2:
-; CHECK-NEXT:    movl	%esi, %eax
+; CHECK-NEXT:    {{movl	%esi, %eax|addl	%edi, %esi}}
 
 
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index 2853930..45fc269 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -28,5 +28,5 @@ lpad:		; preds = %cont, %entry
 }
 
 ; CHECK: call{{.*}}f
-; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT: movl %eax, %esi
+; CHECK: movl %eax, %esi
+; CHECK: call{{.*}}g
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 848af82..2fceab6 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
-; RUN: llc -march=x86-64 -O2 -regalloc=basic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -O2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -O2 -regalloc=basic < %s | FileCheck %s
 ; Test to check .debug_loc support. This test case emits many debug_loc entries.
 
 ; CHECK: Loc expr size
+; CHECK-NEXT: .short
+; CHECK-NEXT: .Ltmp
 ; CHECK-NEXT: DW_OP_reg
 
 %0 = type { double }
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 6600cc3..7909d27 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -68,9 +68,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: Ldebug_loc0:
 ; CHECK-NEXT: .quad   Lfunc_begin0
 ; CHECK-NEXT: .quad   [[LABEL]]
-; CHECK-NEXT: .short  1
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   85
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .quad   [[LABEL]]
 ; CHECK-NEXT: .quad   [[CLOBBER]]
-; CHECK-NEXT: .short  1
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
+; CHECK-NEXT: Ltmp{{.*}}:
+\ No newline at end of file
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index 6db3ce1..bb1db59 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -22,7 +22,7 @@ bb:
 ; it is.
 ;
 ; CHECK: # %bb
-; CHECK: addq $64036, %rdi
+; CHECK: leaq	64036(%rdx), %rdi
 ; CHECK: rep;stosl
 
   %tmp5 = bitcast i32* %tmp4 to i8*
diff --git a/test/CodeGen/Generic/2011-02-12-shuffle.ll b/test/CodeGen/X86/2011-02-12-shuffle.ll
index b4d56d1..b4d56d1 100644
--- a/test/CodeGen/Generic/2011-02-12-shuffle.ll
+++ b/test/CodeGen/X86/2011-02-12-shuffle.ll
diff --git a/test/CodeGen/X86/2011-05-09-loaduse.ll b/test/CodeGen/X86/2011-05-09-loaduse.ll
new file mode 100644
index 0000000..8673d74
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-09-loaduse.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: test
+;CHECK-not: pshufd
+;CHECK: ret
+define float @test(<4 x float>* %A) nounwind {
+entry:
+  %T = load <4 x float>* %A
+  %R = extractelement <4 x float> %T, i32 3
+  store <4 x float><float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>* %A
+  ret float %R
+}
+
diff --git a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
new file mode 100644
index 0000000..0f18f09
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+%struct.attrib = type { i32, i32 }
+%struct.dfa = type { [80 x i8], i32, %struct.state*, i32, i32, %struct.attrib*, i32, i32 }
+%struct.state = type { i32, [4 x i32] }
+
+@aux_temp = external global %struct.dfa, align 8
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+declare void @__memset_chk() nounwind
+
+define void @dfa_add_string() nounwind uwtable ssp {
+entry:
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %idxprom.i = add i64 0, 1
+  br i1 undef, label %land.end.thread.i, label %land.end.i
+
+land.end.thread.i:                                ; preds = %if.end.i
+  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp1710.i = icmp eq i64 %0, -1
+  br i1 %cmp1710.i, label %cond.false156.i, label %cond.true138.i
+
+land.end.i:                                       ; preds = %if.end.i
+  %1 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp17.i = icmp eq i64 %1, -1
+  br i1 %cmp17.i, label %cond.false156.i, label %cond.true138.i
+
+cond.true138.i:                                   ; preds = %for.end.i, %land.end.thread.i
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.false156.i:                                  ; preds = %for.end.i, %land.end.thread.i
+  %idxprom1114.i = phi i64 [ undef, %land.end.thread.i ], [ %idxprom.i, %land.end.i ]
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
+  %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
+  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8, !tbaa !0
+  %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
+  store i32 0, i32* %att.i, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
new file mode 100644
index 0000000..c595bba
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+@bit_count = external constant [256 x i32], align 16
+
+define fastcc void @unate_intersect() nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc.i
+  br label %do.body.i
+
+do.body.i:                                        ; preds = %do.body.i, %for.body
+  %exitcond149 = icmp eq i64 undef, undef
+  br i1 %exitcond149, label %land.lhs.true, label %do.body.i
+
+land.lhs.true:                                    ; preds = %do.body.i
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.inc.i, %if.then
+  %tmp3524.i = phi i32 [ 0, %land.lhs.true ], [ %tmp351.i, %for.inc.i ]
+  %tmp6.i12 = load i32* undef, align 4
+  br i1 undef, label %for.inc.i, label %if.then.i17
+
+if.then.i17:                                      ; preds = %for.body.i
+  %shr.i14 = lshr i32 %tmp6.i12, 8
+  %and14.i = and i32 %shr.i14, 255
+  %idxprom15.i = zext i32 %and14.i to i64
+  %arrayidx16.i = getelementptr inbounds [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
+  %tmp17.i15 = load i32* %arrayidx16.i, align 4
+  %add.i = add i32 0, %tmp3524.i
+  %add24.i = add i32 %add.i, %tmp17.i15
+  %add31.i = add i32 %add24.i, 0
+  %add33.i = add i32 %add31.i, 0
+  br label %for.inc.i
+
+for.inc.i:                                        ; preds = %if.then.i17, %for.body.i
+  %tmp351.i = phi i32 [ %add33.i, %if.then.i17 ], [ %tmp3524.i, %for.body.i ]
+  br label %for.body.i
+}
diff --git a/test/CodeGen/X86/2011-05-31-movmsk.ll b/test/CodeGen/X86/2011-05-31-movmsk.ll
new file mode 100644
index 0000000..2b54d5c
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-31-movmsk.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mcpu=core2 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+%0 = type { double }
+%union.anon = type { float }
+
+define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %d1, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %d1, double* %__f.i, align 8
+  %tmp = bitcast double %d1 to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %add = fadd double %d1, %d2
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %add, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %add, double* %__f.i, align 8
+  %tmp = bitcast double %add to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %f1, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %f1, float* %__f.i, align 4
+  %2 = bitcast float %f1 to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
+
+define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %add = fadd float %f1, %f2
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %add, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %add, float* %__f.i, align 4
+  %2 = bitcast float %add to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
diff --git a/test/CodeGen/X86/2011-06-01-fildll.ll b/test/CodeGen/X86/2011-06-01-fildll.ll
new file mode 100644
index 0000000..3a0b05f
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-01-fildll.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.6"
+
+define float @f(i64* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: movl
+; CHECK-NOT: movl
+  %tmp1 = load i64* %x, align 4
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+  %add = fadd float %conv, 1.000000e+00
+  ret float %add
+}
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
new file mode 100644
index 0000000..bf7f583
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+  %tmp1 = load i64* %a, align 8
+; Insure x87 ops are properly chained, order preserved.
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+; CHECK: fstps
+  store float %conv, float* %f, align 4
+; CHECK: idivl
+  %div = sdiv i32 %x, %y
+  %conv5 = sext i32 %div to i64
+  store i64 %conv5, i64* %b, align 8
+  ret float %conv
+}
+
+define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+; CHECK: movl $0,
+  store i64 0, i64* %b, align 8
+  %mul = mul nsw i32 %y, %x
+  %sub = add nsw i32 %mul, -1
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
+  %tmp4 = load i64* %arrayidx, align 8
+; CHECK: fildll
+  %conv = sitofp i64 %tmp4 to float
+  store float %conv, float* %f, align 4
+  ret float %conv
+}
diff --git a/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll b/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll
new file mode 100644
index 0000000..d934148
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=x86-64 < %s
+define i32 @signbitl(x86_fp80 %x) nounwind uwtable readnone {
+entry:
+  %tmp4 = bitcast x86_fp80 %x to i80
+  %tmp4.lobit = lshr i80 %tmp4, 79
+  %tmp = trunc i80 %tmp4.lobit to i32
+  ret i32 %tmp
+}
diff --git a/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
new file mode 100644
index 0000000..08178a3
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @Quicksort(i32* %a, i32 %l, i32 %r) nounwind ssp {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %do.cond, %entry
+  %l.tr = phi i32 [ %l, %entry ], [ %i.1, %do.cond ]
+  %r.tr = phi i32 [ %r, %entry ], [ %l.tr, %do.cond ]
+  %idxprom12 = sext i32 %r.tr to i64
+  %arrayidx14 = getelementptr inbounds i32* %a, i64 %idxprom12
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %tailrecurse
+  %i.0 = phi i32 [ %l.tr, %tailrecurse ], [ %i.1, %do.cond ]
+  %add7 = add nsw i32 %i.0, 1
+  %cmp = icmp sgt i32 %add7, %r.tr
+  br i1 %cmp, label %do.cond, label %if.then
+
+if.then:                                          ; preds = %do.body
+  store i32 %add7, i32* %arrayidx14, align 4
+  %add16 = add i32 %i.0, 2
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body, %if.then
+  %i.1 = phi i32 [ %add16, %if.then ], [ %add7, %do.body ]
+  %cmp19 = icmp sgt i32 %i.1, %r.tr
+  br i1 %cmp19, label %tailrecurse, label %do.body
+}
diff --git a/test/CodeGen/X86/9601.ll b/test/CodeGen/X86/9601.ll
new file mode 100644
index 0000000..cd65a03
--- /dev/null
+++ b/test/CodeGen/X86/9601.ll
@@ -0,0 +1,12 @@
+; RUN:  llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR9601
+; Previously we'd crash trying to put a 32-bit float into a constraint
+; for a normal 'r' register.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+entry:
+  %0 = call float asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* undef, float 2.000000e+00) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 7535e07..5068d29 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -12,17 +12,6 @@
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
-
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
 @xsrc = external global [32 x i32]
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
index f924ec8..a4abccb 100644
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -4,9 +4,9 @@
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
 ; CHECK: test1:
-; CHECK:	sbbl	%ecx, %ecx
+; CHECK: cmpl %ecx, %eax
 ; CHECK-NOT: addl
-; CHECK: subl	%ecx, %eax
+; CHECK: adcl $0, %eax
   %add4 = add i32 %x, %sum
   %cmp = icmp ult i32 %add4, %x
   %inc = zext i1 %cmp to i32
@@ -18,8 +18,7 @@ entry:
 ; CHECK: test2:
 ; CHECK: movl
 ; CHECK-NEXT: addl
-; CHECK-NEXT: sbbl
-; CHECK-NEXT: subl
+; CHECK-NEXT: adcl $0
 ; CHECK-NEXT: ret
 define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index b95e5b5..7bf527a 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+
+; Some of these tests depend on -join-physregs to commute instructions.
 
 ; The immediate can be encoded in a smaller way if the
 ; instruction is a sub instead of an add.
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index 640237d..a3dc85f 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding -join-physregs | FileCheck %s
 
 ; PR8365
 ; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
diff --git a/test/CodeGen/X86/basic-promote-integers.ll b/test/CodeGen/X86/basic-promote-integers.ll
new file mode 100644
index 0000000..c80f2b0
--- /dev/null
+++ b/test/CodeGen/X86/basic-promote-integers.ll
@@ -0,0 +1,98 @@
+; Test that vectors are scalarized/lowered correctly
+; (with both legalization methods).
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86                   < %s
+
+; A simple test to check copyToParts and copyFromParts.
+
+define <4 x i64> @test_param_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i64> %A
+}
+
+define <2 x i32> @test_param_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <2 x i32> %B
+}
+
+define <4 x i8> @test_param_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i8> %C
+}
+
+; Simple tests to check arithmetic and vector operations on types which need to
+; be legalized (no loads/stores to/from memory here).
+
+define <4 x i64> @test_arith_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i64> %A, <i64 0, i64 1, i64 3, i64 9>
+   ret <4 x i64> %K
+}
+
+define <2 x i32> @test_arith_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <2 x i32> %B, <i32 0, i32 1>
+   ret <2 x i32> %K
+}
+
+define <4 x i8> @test_arith_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   ret <4 x i8> %K
+}
+
+define i8 @test_arith_3(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   %Y = extractelement <4 x i8> %K, i32 1
+   ret i8 %Y
+}
+
+define <4 x i8> @test_arith_4(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %Y = insertelement <4 x i8> %C, i8 1, i32 0
+   ret <4 x i8> %Y
+}
+
+define <4 x i32> @test_arith_5(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %Y = insertelement <4 x i32> %C, i32 1, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i32> @test_arith_6(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %Y = insertelement <4 x i32> %C, i32 %F, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i64> @test_arith_7(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %Y = insertelement <4 x i64> %A, i64 %W, i32 0
+   ret <4 x i64> %Y
+}
+
+define i64 @test_arith_8(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %T = add i64 %W , 11
+   ret i64 %T
+}
+
+define <4 x i64> @test_arith_9(<4 x i64> %A, <2 x i32> %B, <4 x i16> %C)  {
+   %T = add <4 x i16> %C, %C
+   %F0 = extractelement <4 x i16> %T, i32 0
+   %F1 = extractelement <4 x i16> %T, i32 1
+   %W0 = zext i16 %F0 to i64
+   %W1 = zext i16 %F1 to i64
+   %Y0 = insertelement <4 x i64> %A,  i64 %W0, i32 0
+   %Y1 = insertelement <4 x i64> %Y0, i64 %W1, i32 2
+   ret <4 x i64> %Y1
+}
+
+define <4 x i16> @test_arith_10(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+
+
+; Simple tests to check saving/loading from memory
+define <4 x i16> @test_mem_0(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+
diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll
index d2c30c6..3558376 100644
--- a/test/CodeGen/X86/bool-zext.ll
+++ b/test/CodeGen/X86/bool-zext.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
 
-; CHECK: @bar1
-; CHECK: movzbl
-; CHECK: callq
+; X64: @bar1
+; X64: movzbl
+; X64: jmp
+; WIN64: @bar1
+; WIN64: movzbl
+; WIN64: callq
 define void @bar1(i1 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i1 %v1 to i32
@@ -10,9 +14,12 @@ entry:
   ret void
 }
 
-; CHECK: @bar2
-; CHECK-NOT: movzbl
-; CHECK: callq
+; X64: @bar2
+; X64-NOT: movzbl
+; X64: jmp
+; WIN64: @bar2
+; WIN64-NOT: movzbl
+; WIN64: callq
 define void @bar2(i8 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i8 %v1 to i32
@@ -20,11 +27,16 @@ entry:
   ret void
 }
 
-; CHECK: @bar3
-; CHECK: callq
-; CHECK-NOT: movzbl
-; CHECK-NOT: and
-; CHECK: ret
+; X64: @bar3
+; X64: callq
+; X64-NOT: movzbl
+; X64-NOT: and
+; X64: ret
+; WIN64: @bar3
+; WIN64: callq
+; WIN64-NOT: movzbl
+; WIN64-NOT: and
+; WIN64: ret
 define zeroext i1 @bar3() nounwind ssp {
 entry:
   %call = call i1 @foo2() nounwind
diff --git a/test/CodeGen/X86/byval-align.ll b/test/CodeGen/X86/byval-align.ll
new file mode 100644
index 0000000..c62a181
--- /dev/null
+++ b/test/CodeGen/X86/byval-align.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+%struct.S = type { i32}
+
+@.str = private constant [10 x i8] c"ptr = %p\0A\00", align 1 ; <[10 x i8]*> [#uses=1]
+@.str1 = private constant [8 x i8] c"Failed \00", align 1 ; <[8 x i8]*> [#uses=1]
+@.str2 = private constant [2 x i8] c"0\00", align 1 ; <[2 x i8]*> [#uses=1]
+@.str3 = private constant [7 x i8] c"test.c\00", align 1 ; <[7 x i8]*> [#uses=1]
+@__PRETTY_FUNCTION__.2067 = internal constant [13 x i8] c"aligned_func\00" ; <[13 x i8]*> [#uses=1]
+
+define void @aligned_func(%struct.S* byval align 64 %obj) nounwind {
+entry:
+  %ptr = alloca i8*                               ; <i8**> [#uses=3]
+  %p = alloca i64                                 ; <i64*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %obj1 = bitcast %struct.S* %obj to i8*          ; <i8*> [#uses=1]
+  store i8* %obj1, i8** %ptr, align 8
+  %0 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %1 = ptrtoint i8* %0 to i64                     ; <i64> [#uses=1]
+  store i64 %1, i64* %p, align 8
+  %2 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
+  %4 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %5 = and i64 %4, 140737488355264                ; <i64> [#uses=1]
+  %6 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %7 = icmp ne i64 %5, %6                         ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  call void @__assert_fail(i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8]* @.str3, i64 0, i64 0), i32 18, i8* getelementptr inbounds ([13 x i8]* @__PRETTY_FUNCTION__.2067, i64 0, i64 0)) noreturn nounwind
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
+
+declare i32 @puts(i8*)
+
+declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn nounwind
+
+define void @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: andq    $-64, %rsp
+  %s1 = alloca %struct.S                          ; <%struct.S*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 1, i32* %0, align 4
+  call void @aligned_func(%struct.S* byval align 64 %s1) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 686ed9c..98a26e4 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -9,7 +9,6 @@ entry:
 ; CHECK: main:
 ; CHECK: movl $1, (%esp)
 ; CHECK: leal 16(%esp), %edi
-; CHECK: movl $36, %ecx
 ; CHECK: leal 160(%esp), %esi
 ; CHECK: rep;movsl
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 623ac75..d76fab4 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -31,3 +31,18 @@ entry:
 }
 
 declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+
+; Don't generate the cmovne when the source is known non-zero (and bsr would
+; not set ZF).
+; rdar://9490949
+
+define i32 @t4(i32 %n) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: ret
+  %or = or i32 %n, 1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 7306920..6e5c1cf 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s
 ; CHECK-NOT:     mov
 ; CHECK:     paddw
 ; CHECK-NOT:     mov
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
new file mode 100644
index 0000000..bfc96f1
--- /dev/null
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+; Radar 9511391
+
+;CHECK:         .byte   4                       ## DW_AT_const_value
+define i32 @foo() nounwind uwtable readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  ret i32 42, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590080, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 2, i32 12, metadata !7, null}
+!10 = metadata !{i32 3, i32 2, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
new file mode 100644
index 0000000..5a51eb8
--- /dev/null
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s - | FileCheck %s
+target triple = "x86_64-apple-darwin10.0.0"
+
+;CHECK:        ## DW_OP_constu
+;CHECK-NEXT:  .byte	42
+define i32 @foobar() nounwind readonly noinline ssp {
+entry:
+  %call = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  %call2 = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
+  %add = add nsw i32 %call2, %call, !dbg !12
+  ret i32 %add, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare i32 @bar() nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foobar = !{!6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar}
+!1 = metadata !{i32 524329, metadata !"mu.c", metadata !"/private/tmp", metadata !2}
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0}
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 524544, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5}
+!7 = metadata !{i32 524299, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 15, i32 12, metadata !7, null}
+!10 = metadata !{i32 23, i32 3, metadata !7, null}
+!11 = metadata !{i32 17, i32 3, metadata !7, null}
+!12 = metadata !{i32 18, i32 3, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index e7d5f92..3a849aa 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
 
 ; Radar 8884898
-; CHECK: file	1 "/Users/manav/one/two/simple.c"
+; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
 
 declare i32 @printf(i8*, ...) nounwind
 
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index 76b93dd..afe1729 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -6,8 +6,11 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK: Ldebug_loc0:
 ;CHECK-NEXT:	.quad	Lfunc_begin0
 ;CHECK-NEXT:	.quad	L
-;CHECK-NEXT:	.short	1                       ## Loc expr size
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}          ## Loc expr size
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT: Ltmp7
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
new file mode 100644
index 0000000..81303bb
--- /dev/null
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+;CHECK: .loc	1 2 11 prologue_end
+define i32 @foo(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
+  store i32 2, i32* %j, align 4, !dbg !12
+  %tmp = load i32* %j, align 4, !dbg !13
+  %inc = add nsw i32 %tmp, 1, !dbg !13
+  store i32 %inc, i32* %j, align 4, !dbg !13
+  %tmp1 = load i32* %j, align 4, !dbg !14
+  %tmp2 = load i32* %i.addr, align 4, !dbg !14
+  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
+  store i32 %add, i32* %j, align 4, !dbg !14
+  %tmp3 = load i32* %j, align 4, !dbg !15
+  ret i32 %tmp3, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @foo(i32 21), !dbg !16
+  ret i32 %call, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 13, metadata !1, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 2, i32 6, metadata !10, null}
+!12 = metadata !{i32 2, i32 11, metadata !10, null}
+!13 = metadata !{i32 3, i32 2, metadata !10, null}
+!14 = metadata !{i32 4, i32 2, metadata !10, null}
+!15 = metadata !{i32 5, i32 2, metadata !10, null}
+!16 = metadata !{i32 8, i32 2, metadata !17, null}
+!17 = metadata !{i32 589835, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
new file mode 100644
index 0000000..b115bf4
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9817
+
+
+declare  <4 x i32> @__amdil_get_global_id_int()
+declare  void @llvm.dbg.value(metadata , i64 , metadata )
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
+!7), !dbg !8
+  %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
+  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
+  %tmp3 = add i32 0, %tmp2, !dbg !15
+; CHECK:  ##DEBUG_VALUE: idx <- EAX+0
+  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
+!15
+  %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
+  ret void, !dbg !17
+}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata
+!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
+!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 42, metadata !0, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 3, i32 41, metadata !10, null}
+!12 = metadata !{i32 0}
+!13 = metadata !{i32 590080, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 4, i32 20, metadata !10, null}
+!15 = metadata !{i32 5, i32 15, metadata !10, null}
+!16 = metadata !{i32 6, i32 18, metadata !10, null}
+!17 = metadata !{i32 7, i32 1, metadata !0, null}
+
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
new file mode 100644
index 0000000..d1a9e57
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9879
+
+; CHECK: ##DEBUG_VALUE: tid <-
+%0 = type { i8*, i8*, i8*, i8*, i32 }
+
+@sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@lvgv = internal constant [0 x i8*] zeroinitializer
+@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* bitcast ([1 x i8] addrspace(2)* @sgv to i8*), i8* bitcast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
+
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
+  %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  br label %2
+
+; <label>:2                                       ; preds = %entry
+  %3 = phi i32 [ %1, %entry ]
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %5 = phi i32 [ %3, %2 ]
+  br label %get_local_id.exit
+
+get_local_id.exit:                                ; preds = %4
+  %6 = phi i32 [ %5, %4 ]
+  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
+  %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %8 = extractelement <4 x i32> %7, i32 0
+  br label %9
+
+; <label>:9                                       ; preds = %get_local_id.exit
+  %10 = phi i32 [ %8, %get_local_id.exit ]
+  br label %11
+
+; <label>:11                                      ; preds = %9
+  %12 = phi i32 [ %10, %9 ]
+  br label %get_global_id.exit
+
+get_global_id.exit:                               ; preds = %11
+  %13 = phi i32 [ %12, %11 ]
+  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
+  %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
+  %15 = extractelement <4 x i32> %14, i32 0
+  br label %16
+
+; <label>:16                                      ; preds = %get_global_id.exit
+  %17 = phi i32 [ %15, %get_global_id.exit ]
+  br label %18
+
+; <label>:18                                      ; preds = %16
+  %19 = phi i32 [ %17, %16 ]
+  br label %get_local_size.exit
+
+get_local_size.exit:                              ; preds = %18
+  %20 = phi i32 [ %19, %18 ]
+  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
+  %tmp5 = add i32 %6, %13, !dbg !17
+  %tmp7 = add i32 %tmp5, %20, !dbg !17
+  store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
+  br label %return, !dbg !17
+
+return:                                           ; preds = %get_local_size.exit
+  ret void, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare <4 x i32> @__amdil_get_local_size_int() nounwind
+
+declare <4 x i32> @__amdil_get_local_id_int() nounwind
+
+declare <4 x i32> @__amdil_get_global_id_int() nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 1, i32 32, metadata !0, null}
+!10 = metadata !{i32 590080, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 5, i32 24, metadata !11, null}
+!13 = metadata !{i32 590080, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 6, i32 25, metadata !11, null}
+!15 = metadata !{i32 590080, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 7, i32 26, metadata !11, null}
+!17 = metadata !{i32 9, i32 24, metadata !11, null}
+!18 = metadata !{i32 10, i32 1, metadata !0, null}
+
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index da49f2d..28d873b 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic -join-physregs < %s | FileCheck %s
 
 %struct.a = type { i32 }
 
@@ -53,7 +53,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ;CHECK:Ldebug_loc0:
 ;CHECK-NEXT:	.quad
 ;CHECK-NEXT:	.quad	[[CLOBBER]]
-;CHECK-NEXT:	.short	1
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/div8.ll b/test/CodeGen/X86/div8.ll
new file mode 100644
index 0000000..0825f79
--- /dev/null
+++ b/test/CodeGen/X86/div8.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '8div.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+define signext i8 @test_div(i8 %dividend, i8 %divisor) nounwind ssp {
+entry:
+  %dividend.addr = alloca i8, align 2
+  %divisor.addr = alloca i8, align 1
+  %quotient = alloca i8, align 1
+  store i8 %dividend, i8* %dividend.addr, align 2
+  store i8 %divisor, i8* %divisor.addr, align 1
+  %tmp = load i8* %dividend.addr, align 2
+  %tmp1 = load i8* %divisor.addr, align 1
+; Insist on i8->i32 zero extension, even though divb demands only i16:
+; CHECK: movzbl {{.*}}%eax
+; CHECK: divb
+  %div = udiv i8 %tmp, %tmp1
+  store i8 %div, i8* %quotient, align 1
+  %tmp4 = load i8* %quotient, align 1
+  ret i8 %tmp4
+}
diff --git a/test/CodeGen/X86/eh_frame.ll b/test/CodeGen/X86/eh_frame.ll
new file mode 100644
index 0000000..3b792b2
--- /dev/null
+++ b/test/CodeGen/X86/eh_frame.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu | FileCheck -check-prefix=STATIC %s
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=PIC %s
+
+@__FRAME_END__ = constant [1 x i32] zeroinitializer, section ".eh_frame"
+
+@foo = external global i32
+@bar1 = constant i8* bitcast (i32* @foo to i8*), section "my_bar1", align 8
+
+
+; STATIC: .section	.eh_frame,"a",@progbits
+; STATIC: .section	my_bar1,"a",@progbits
+
+; PIC:	.section	.eh_frame,"a",@progbits
+; PIC:	.section	my_bar1,"aw",@progbits
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index b303cd1..874c53a 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,10 +6,24 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NOT: movq %rsp, %rbp
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP:      movq %rsp, %rbp
-; CHECK-FP-NEXT: Ltmp1:
-; CHECK-FP:      nop
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_startproc
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: pushq %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_offset %rbp, -16
+; CHECK-FP-NEXT: movq %rsp, %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-FP-NEXT: nop
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/fast-isel-agg-constant.ll b/test/CodeGen/X86/fast-isel-agg-constant.ll
new file mode 100644
index 0000000..ce0dff7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-agg-constant.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't screw up aggregate constants.
+; (Failing out is okay, as long as we don't miscompile.)
+
+%bar = type { i32 }
+
+define i32 @foo()  {
+  %tmp = extractvalue %bar { i32 3 }, 0
+  ret i32 %tmp
+; CHECK: movl $3, %eax
+}
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 5fcdbbb..2fbe4e2 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -fast-isel -march=x86 | grep and
+; RUN: llc < %s -O0 -fast-isel-abort -march=x86 | FileCheck %s
 
-define i32 @t() nounwind {
+%struct.s = type {i32, i32, i32}
+
+define i32 @test1() nounwind {
 tak:
 	%tmp = call i1 @foo()
 	br i1 %tmp, label %BB1, label %BB2
@@ -8,6 +10,46 @@ BB1:
 	ret i32 1
 BB2:
 	ret i32 0
+; CHECK: test1:
+; CHECK: calll
+; CHECK-NEXT: testb	$1
 }
-
 declare i1 @foo() zeroext nounwind
+
+declare void @foo2(%struct.s* byval)
+
+define void @test2(%struct.s* %d) nounwind {
+  call void @foo2(%struct.s* %d byval)
+  ret void
+; CHECK: test2:
+; CHECK: movl	(%eax)
+; CHECK: movl {{.*}}, (%esp)
+; CHECK: movl	4(%eax)
+; CHECK: movl {{.*}}, 4(%esp)
+; CHECK: movl	8(%eax)
+; CHECK: movl {{.*}}, 8(%esp)
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @test3(i8* %a) {
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test3:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	$0, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memset
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @test4(i8* %a, i8* %b) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test4:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	{{.*}}, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memcpy
+}
diff --git a/test/CodeGen/X86/fast-isel-extract.ll b/test/CodeGen/X86/fast-isel-extract.ll
new file mode 100644
index 0000000..f63396e
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-extract.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort | FileCheck %s
+
+%struct.x = type { i64, i64 }
+%addovf = type { i32, i1 }
+declare %struct.x @f()
+
+define void @test1(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 0
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test1:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rax
+}
+
+define void @test2(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 1
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test2:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rdx
+}
+
+declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+define void @test3(i32 %x, i32 %y, i32* %z) {
+  %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %sum = extractvalue %addovf %r, 0
+  %sum3 = mul i32 %sum, 3
+  %bit = extractvalue %addovf %r, 1
+  br i1 %bit, label %then, label %end
+  
+then:
+  store i32 %sum3, i32* %z
+  br label %end
+
+end:
+  ret void
+; CHECK: test3
+; CHECK: addl
+; CHECK: seto %al
+; CHECK: testb $1, %al
+}
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index 5ffd48b..f42a4a2 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
 
 ; CHECK: doo:
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index 5d572c1..bea18a1 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,14 +1,15 @@
-; RUN: llc < %s -march=x86 -fast-isel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
 
-declare i64 @test1a(i64)
+declare i32 @test1a(i32)
 
-define i32 @test1(i64 %x) nounwind {
+define i32 @test1(i32 %x) nounwind {
 ; CHECK: test1:
 ; CHECK: andb $1, %
-	%y = add i64 %x, -3
-	%t = call i64 @test1a(i64 %y)
-	%s = mul i64 %t, 77
-	%z = trunc i64 %s to i1
+	%y = add i32 %x, -3
+	%t = call i32 @test1a(i32 %y)
+	%s = mul i32 %t, 77
+	%z = trunc i32 %s to i1
 	br label %next
 
 next:		; preds = %0
diff --git a/test/CodeGen/X86/fast-isel-ret-ext.ll b/test/CodeGen/X86/fast-isel-ret-ext.ll
new file mode 100644
index 0000000..fd768cb
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-ret-ext.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple x86_64-apple-darwin10 | FileCheck %s
+
+define zeroext i8 @test1(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test1:
+  ; CHECK: movzbl {{.*}}, %eax
+}
+
+define signext i8 @test2(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test2:
+  ; CHECK: movsbl {{.*}}, %eax
+}
+
+define zeroext i16 @test3(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test3:
+  ; CHECK: movzwl {{.*}}, %eax
+}
+
+define signext i16 @test4(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test4:
+  ; CHECK: movswl {{.*}}, %eax
+}
+
+define zeroext i1 @test5(i32 %y) nounwind {
+  %conv = trunc i32 %y to i1
+  ret i1 %conv
+  ; CHECK: test5:
+  ; CHECK: andb $1
+  ; CHECK: movzbl {{.*}}, %eax
+}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 5a1d213..8391860 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10
 
 ; This tests very minimal fast-isel functionality.
 
@@ -27,7 +27,7 @@ exit:
   ret i32* %t8
 }
 
-define double @bar(double* %p, double* %q) nounwind {
+define void @bar(double* %p, double* %q) nounwind {
 entry:
   %r = load double* %p
   %s = load double* %q
@@ -41,7 +41,8 @@ fast:
   br label %exit
 
 exit:
-  ret double %t3
+  store double %t3, double* %q
+  ret void
 }
 
 define i32 @cast() nounwind {
@@ -68,24 +69,28 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   ret i8* %t
 }
 
-define i8 @trunc_i32_i8(i32 %x) signext nounwind  {
+define void @trunc_i32_i8(i32 %x, i8* %p) nounwind  {
 	%tmp1 = trunc i32 %x to i8
-	ret i8 %tmp1
+	store i8 %tmp1, i8* %p
+	ret void
 }
 
-define i8 @trunc_i16_i8(i16 signext %x) signext nounwind  {
+define void @trunc_i16_i8(i16 signext %x, i8* %p) nounwind  {
 	%tmp1 = trunc i16 %x to i8
-	ret i8 %tmp1
+	store i8 %tmp1, i8* %p
+	ret void
 }
 
-define i8 @shl_i8(i8 %a, i8 %c) nounwind {
-       %tmp = shl i8 %a, %c
-       ret i8 %tmp
+define void @shl_i8(i8 %a, i8 %c, i8* %p) nounwind {
+  %tmp = shl i8 %a, %c
+  store i8 %tmp, i8* %p
+  ret void
 }
 
-define i8 @mul_i8(i8 %a) nounwind {
-       %tmp = mul i8 %a, 17
-       ret i8 %tmp
+define void @mul_i8(i8 %a, i8* %p) nounwind {
+  %tmp = mul i8 %a, 17
+  store i8 %tmp, i8* %p
+  ret void
 }
 
 define void @load_store_i1(i1* %p, i1* %q) nounwind {
diff --git a/test/CodeGen/X86/fold-xmm-zero.ll b/test/CodeGen/X86/fold-xmm-zero.ll
new file mode 100644
index 0000000..b4eeb40
--- /dev/null
+++ b/test/CodeGen/X86/fold-xmm-zero.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx10.6.7 -mattr=+sse2 | FileCheck %s
+
+; Simple test to make sure folding for special constants (like float zero)
+; isn't completely broken.
+
+; CHECK: divss	LCPI0
+
+%0 = type { float, float, float, float, float, float, float, float }
+
+define void @f() nounwind ssp {
+entry:
+  %0 = tail call %0 asm sideeffect "foo", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00) nounwind
+  %asmresult = extractvalue %0 %0, 0
+  %asmresult8 = extractvalue %0 %0, 1
+  %asmresult9 = extractvalue %0 %0, 2
+  %asmresult10 = extractvalue %0 %0, 3
+  %asmresult11 = extractvalue %0 %0, 4
+  %asmresult12 = extractvalue %0 %0, 5
+  %asmresult13 = extractvalue %0 %0, 6
+  %asmresult14 = extractvalue %0 %0, 7
+  %div = fdiv float %asmresult, 0.000000e+00
+  %1 = tail call %0 asm sideeffect "bar", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div, float %asmresult8, float %asmresult9, float %asmresult10, float %asmresult11, float %asmresult12, float %asmresult13, float %asmresult14) nounwind
+  %asmresult24 = extractvalue %0 %1, 0
+  %asmresult25 = extractvalue %0 %1, 1
+  %asmresult26 = extractvalue %0 %1, 2
+  %asmresult27 = extractvalue %0 %1, 3
+  %asmresult28 = extractvalue %0 %1, 4
+  %asmresult29 = extractvalue %0 %1, 5
+  %asmresult30 = extractvalue %0 %1, 6
+  %asmresult31 = extractvalue %0 %1, 7
+  %div33 = fdiv float %asmresult24, 0.000000e+00
+  %2 = tail call %0 asm sideeffect "baz", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div33, float %asmresult25, float %asmresult26, float %asmresult27, float %asmresult28, float %asmresult29, float %asmresult30, float %asmresult31) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/hidden-vis-pic.ll b/test/CodeGen/X86/hidden-vis-pic.ll
index 217dba6..67be3d0 100644
--- a/test/CodeGen/X86/hidden-vis-pic.ll
+++ b/test/CodeGen/X86/hidden-vis-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim | FileCheck %s
 
 
 
@@ -26,7 +26,7 @@ entry:
 
 @.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
 
-define hidden void @func() nounwind ssp {
+define hidden void @func() nounwind ssp uwtable {
 entry:
   %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
   br label %return
@@ -37,7 +37,7 @@ return:                                           ; preds = %entry
 
 declare i32 @puts(i8*)
 
-define hidden i32 @main() nounwind ssp {
+define hidden i32 @main() nounwind ssp uwtable {
 entry:
   %retval = alloca i32                            ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
new file mode 100644
index 0000000..72e17c0
--- /dev/null
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx  | FileCheck %s
+
+; Common "xorb al, al" instruction in the two successor blocks should be
+; moved to the entry block above the test + je.
+
+; rdar://9145558
+
+define zeroext i1 @t(i32 %c) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: xorb %al, %al
+; CHECK: test
+; CHECK: je
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+; CHECK: callq
+  %call = tail call zeroext i1 (...)* @foo() nounwind
+  br label %return
+
+return:
+; CHECK: ret
+  %retval.0 = phi i1 [ %call, %if.then ], [ false, %entry ]
+  ret i1 %retval.0
+}
+
+declare zeroext i1 @foo(...)
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
new file mode 100644
index 0000000..29c5ae5
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -0,0 +1,17 @@
+; RUN: not llc -march x86 -regalloc=fast       < %s 2> %t1
+; RUN: not llc -march x86 -regalloc=basic      < %s 2> %t2
+; RUN: not llc -march x86 -regalloc=greedy     < %s 2> %t3
+; RUN: FileCheck %s < %t1
+; RUN: FileCheck %s < %t2
+; RUN: FileCheck %s < %t3
+
+; The register allocator must fail on this function, and it should print the
+; inline asm in the diagnostic.
+; CHECK: LLVM ERROR: Ran out of registers during register allocation!
+; CHECK: INLINEASM <es:hello world>
+
+define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
+entry:
+  tail call void asm sideeffect "hello world", "r,r,r,r,r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 507a328..4a98e63 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: not grep cmp %t
-; RUN: not grep xor %t
-; RUN: grep jne %t | count 1
-; RUN: grep jp %t | count 1
-; RUN: grep setnp %t | count 1
-; RUN: grep sete %t | count 1
-; RUN: grep and %t | count 1
-; RUN: grep cvt %t | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define i32 @isint_return(double %d) nounwind {
+; CHECK-NOT: xor
+; CHECK: cvt
   %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
+; CHECK: cmpeqsd
   %c = fcmp oeq double %d, %e
+; CHECK-NEXT: movd
+; CHECK-NEXT: andl
   %z = zext i1 %c to i32
   ret i32 %z
 }
@@ -19,9 +17,14 @@ define i32 @isint_return(double %d) nounwind {
 declare void @foo()
 
 define void @isint_branch(double %d) nounwind {
+; CHECK: cvt
   %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
+; CHECK: ucomisd
   %c = fcmp oeq double %d, %e
+; CHECK-NEXT: jne
+; CHECK-NEXT: jp
   br i1 %c, label %true, label %false
 true:
   call void @foo()
diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll
index 5bc4f7e..09c1c07 100644
--- a/test/CodeGen/X86/lsr-overflow.ll
+++ b/test/CodeGen/X86/lsr-overflow.ll
@@ -25,3 +25,21 @@ __ABContainsLabel.exit:
   %cmp = icmp eq i64 %indvar, 9223372036854775807
   ret i1 %cmp
 }
+
+define void @func_37() noreturn nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.inc8 ]
+  %sub.i = add i64 undef, %indvar
+  %cmp.i = icmp eq i64 %sub.i, -9223372036854775808
+  br i1 undef, label %for.inc8, label %for.cond4
+
+for.cond4:                                        ; preds = %for.cond4, %for.body
+  br label %for.cond4
+
+for.inc8:                                         ; preds = %for.body
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body
+}
diff --git a/test/CodeGen/X86/movntdq-no-avx.ll b/test/CodeGen/X86/movntdq-no-avx.ll
new file mode 100644
index 0000000..8b7e6ef
--- /dev/null
+++ b/test/CodeGen/X86/movntdq-no-avx.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; Test that we produce a movntdq, not a vmovntdq
+; CHECK-NOT: vmovntdq
+
+define void @test(<2 x i64>* nocapture %a, <2 x i64> %b) nounwind optsize {
+entry:
+  store <2 x i64> %b, <2 x i64>* %a, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
new file mode 100644
index 0000000..1d09535
--- /dev/null
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+; CHECK: movntps
+  %cast = bitcast i8* %B to <4 x float>*
+  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+; CHECK: movntdq
+  %cast1 = bitcast i8* %B to <2 x i64>*
+  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: movntpd
+  %cast2 = bitcast i8* %B to <2 x double>*
+  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e35eb70..e42aa9d 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -41,14 +41,13 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;      CHECK:         jle
 ;  CHECK-NOT:         cmov
-;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx}}
+;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx|%eax, %eax}}
 ; CHECK-NEXT:         align
 ; CHECK-NEXT: BB1_2:
-; CHECK-NEXT:         callq
+; CHECK:              callq
 ; CHECK-NEXT:         incl    [[BX:%[a-z0-9]+]]
 ; CHECK-NEXT:         cmpl    [[R14:%[a-z0-9]+]], [[BX]]
-; CHECK-NEXT:         movq    %rax, %r{{di|cx}}
-; CHECK-NEXT:         jl
+; CHECK:              jl
 
 define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
 entry:
diff --git a/test/CodeGen/X86/peep-setb.ll b/test/CodeGen/X86/peep-setb.ll
new file mode 100644
index 0000000..0bab789
--- /dev/null
+++ b/test/CodeGen/X86/peep-setb.ll
@@ -0,0 +1,82 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @test1(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %add = add i8 %cond, %b
+  ret i8 %add
+; CHECK: test1:
+; CHECK: adcb $0
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %add = add i32 %cond, %b
+  ret i32 %add
+; CHECK: test2:
+; CHECK: adcl $0
+}
+
+define i64 @test3(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %add = add i64 %conv, %b
+  ret i64 %add
+; CHECK: test3:
+; CHECK: adcq $0
+}
+
+define i8 @test4(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test4:
+; CHECK: sbbb $0
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test5:
+; CHECK: sbbl $0
+}
+
+define i64 @test6(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test6:
+; CHECK: sbbq $0
+}
+
+define i8 @test7(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = sext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test7:
+; CHECK: adcb $0
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = sext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test8:
+; CHECK: adcl $0
+}
+
+define i64 @test9(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = sext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test9:
+; CHECK: adcq $0
+}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 13e804d..02c519f 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 4
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
@@ -12,7 +12,8 @@ forcond.preheader:		; preds = %entry
 
 ifthen:		; preds = %entry
 	ret i32 0
-
+; CHECK: forbody
+; CHECK-NOT: mov
 forbody:		; preds = %forbody, %forcond.preheader
 	%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]		; <i32> [#uses=3]
 	%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index f23669e..4162015 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -join-physregs | FileCheck %s
 ; rdar://5571034
 
+; This requires physreg joining, %vreg13 is live everywhere:
+; 304L		%CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
+; 320L		%vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
+; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
+
 define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
 ; CHECK: foo:
 entry:
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index bf5229a..d8ed4c0 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 11
 
+; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
+
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
         ret <4 x i32> %A
diff --git a/test/CodeGen/X86/pr10068.ll b/test/CodeGen/X86/pr10068.ll
new file mode 100644
index 0000000..8829c5d
--- /dev/null
+++ b/test/CodeGen/X86/pr10068.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+
+define void @foobar() {
+entry:
+  %sub.i = trunc i64 undef to i32
+  %shr80.i = ashr i32 %sub.i, 16
+  %add82.i = add nsw i32 %shr80.i, 1
+  %notlhs.i = icmp slt i32 %shr80.i, undef
+  %notrhs.i = icmp sgt i32 %add82.i, -1
+  %or.cond.not.i = and i1 %notrhs.i, %notlhs.i
+  %cmp154.i = icmp slt i32 0, undef
+  %or.cond406.i = and i1 %or.cond.not.i, %cmp154.i
+  %or.cond406.not.i = xor i1 %or.cond406.i, true
+  %or.cond407.i = or i1 undef, %or.cond406.not.i
+  br i1 %or.cond407.i, label %if.then158.i, label %if.end163.i
+
+if.then158.i:
+  ret void
+
+if.end163.i:                                      ; preds = %if.end67.i
+  ret void
+}
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index ef0f9ea..5dab5c9 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 -disable-branch-fold | FileCheck %s
 ; PR2659
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -19,7 +18,11 @@ forcond.preheader:              ; preds = %entry
 ; CHECK-NOT: xorl
 ; CHECK-NOT: movl
 ; CHECK-NOT: LBB
-; CHECK: je
+; CHECK: jne
+
+; There should be no moves required in the for loop body.
+; CHECK: %forbody
+; CHECK-NOT: mov
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
index 9b251f5..ba92c77 100644
--- a/test/CodeGen/X86/pr9127.ll
+++ b/test/CodeGen/X86/pr9127.ll
@@ -10,4 +10,4 @@ entry:
 }
 
 ; test that the load is folded.
-; CHECK: ucomisd	(%{{rdi|rdx}}), %xmm0
+; CHECK: cmpeqsd	(%{{rdi|rdx}}), %xmm0
diff --git a/test/CodeGen/X86/pr9743.ll b/test/CodeGen/X86/pr9743.ll
index 8feccd9..6597c23 100644
--- a/test/CodeGen/X86/pr9743.ll
+++ b/test/CodeGen/X86/pr9743.ll
@@ -9,9 +9,9 @@ define void @f() {
 ; CHECK-NEXT: :
 ; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT: :
-; CHECK-NEXT:  .cfi_offset 6, -16
+; CHECK-NEXT:  .cfi_offset %rbp, -16
 ; CHECK-NEXT:  movq    %rsp, %rbp
 ; CHECK-NEXT: :
-; CHECK-NEXT:  .cfi_def_cfa_register 6
+; CHECK-NEXT:  .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:  popq    %rbp
 ; CHECK-NEXT:  ret
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 04b57dd..865e147 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
@@ -8,19 +8,32 @@ entry:
 	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
 	store <1 x i64> %call, <1 x i64>* @g_v1di
         ret void
+; CHECK: t1:
+; CHECK: callq
+; CHECK-NEXT: movq	_g_v1di
+; CHECK-NEXT: movq	%rax,
 }
 
 declare <1 x i64> @return_v1di()
 
 define <1 x i64> @t2() nounwind {
 	ret <1 x i64> <i64 1>
+; CHECK: t2:
+; CHECK: movl	$1
+; CHECK-NEXT: ret
 }
 
 define <2 x i32> @t3() nounwind {
 	ret <2 x i32> <i32 1, i32 0>
+; CHECK: t3:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
 }
 
 define double @t4() nounwind {
 	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
+; CHECK: t4:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
 }
 
diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
index 4a9c1ba..aa2f0af 100644
--- a/test/CodeGen/X86/setoeq.ll
+++ b/test/CodeGen/X86/setoeq.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86  | grep set | count 2
-; RUN: llc < %s -march=x86  | grep and
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define zeroext i8 @t(double %x) nounwind readnone {
 entry:
@@ -7,5 +6,16 @@ entry:
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
 	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpeqsd
+	ret i8 %retval12
+}
+
+define zeroext i8 @u(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp une double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpneqsd
 	ret i8 %retval12
 }
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll
new file mode 100644
index 0000000..24ba1fc
--- /dev/null
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define i64 @test(i64 %A) {
+; CHECK: @test
+; CHECK: shrq $54
+; CHECK: andq $1020
+; CHECK: ret
+    %B = lshr i64 %A, 56
+    %C = shl i64 %B, 2
+    ret i64 %C
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index de2a81e8..4a98efb 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -198,7 +198,7 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
 
 ; rdar://r7717598
 %struct.ns = type { i32, i32 }
-%struct.cp = type { float, float }
+%struct.cp = type { float, float, float, float, float }
 
 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
 ; 32: t13:
@@ -229,7 +229,7 @@ entry:
 ; 64: t14:
 ; 64: movq 32(%rdi)
 ; 64-NOT: movq 16(%rdi)
-; 64: jmpq *16(%rdi)
+; 64: jmpq *16({{%rdi|%rax}})
   %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
   %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
   %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
deleted file mode 100644
index 7c23adb..0000000
--- a/test/CodeGen/X86/smul-with-overflow-2.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 | grep mul | count 1
-; RUN: llc < %s -march=x86 | grep add | count 3
-
-define i32 @t1(i32 %a, i32 %b) nounwind readnone {
-entry:
-        %tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
-}
-
-define i32 @t2(i32 %a, i32 %b) nounwind readnone {
-entry:
-        %tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
-}
-
-declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
deleted file mode 100644
index 49c31f5..0000000
--- a/test/CodeGen/X86/smul-with-overflow-3.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep {jno} | count 1
-
-@ok = internal constant [4 x i8] c"%d\0A\00"
-@no = internal constant [4 x i8] c"no\0A\00"
-
-define i1 @func1(i32 %v1, i32 %v2) nounwind {
-entry:
-  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
-  %sum = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %normal
-
-overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
-  ret i1 false
-
-normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
-  ret i1 true
-}
-
-declare i32 @printf(i8*, ...) nounwind
-declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 6d125e4..7c2e247 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
 
-define i1 @func1(i32 %v1, i32 %v2) nounwind {
+define i1 @test1(i32 %v1, i32 %v2) nounwind {
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %sum = extractvalue {i32, i1} %t, 0
@@ -17,7 +17,53 @@ normal:
 overflow:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
+; CHECK: test1:
+; CHECK: imull
+; CHECK-NEXT: jo
+}
+
+define i1 @test2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+; CHECK: test2:
+; CHECK: imull
+; CHECK-NEXT: jno
 }
 
 declare i32 @printf(i8*, ...) nounwind
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test4:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 348121a..ff0af25 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
@@ -12,6 +12,9 @@
 ; y_ : use -0.0 instead of %y
 ; _inverse : swap the arms of the select.
 
+; Some of these tests depend on -join-physregs commuting instructions to
+; eliminate copies.
+
 ; CHECK:      ogt:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8e72f13..8c2e58d 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -62,11 +62,10 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
 	ret <8 x i16> %tmp
 ; X64: t4:
-; X64: 	pextrw	$7, %xmm0, %eax
-; X64: 	pshufhw	$100, %xmm0, %xmm1
-; X64: 	pinsrw	$1, %eax, %xmm1
-; X64: 	pextrw	$1, %xmm0, %eax
-; X64: 	movdqa	%xmm1, %xmm0
+; X64: 	pextrw	$7, [[XMM0:%xmm[0-9]+]], %eax
+; X64: 	pshufhw	$100, [[XMM0]], [[XMM1:%xmm[0-9]+]]
+; X64: 	pinsrw	$1, %eax, [[XMM1]]
+; X64: 	pextrw	$1, [[XMM0]], %eax
 ; X64: 	pinsrw	$4, %eax, %xmm0
 ; X64: 	ret
 }
@@ -251,13 +250,13 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		pinsrw	$0, %eax, %xmm1
-; X64: 		pextrw	$8, %xmm0, %eax
-; X64: 		pinsrw	$1, %eax, %xmm1
-; X64: 		pextrw	$1, %xmm1, %ecx
-; X64: 		movd	%xmm1, %edx
-; X64: 		pinsrw	$0, %edx, %xmm1
-; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
+; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
+; X64: 		pinsrw	$1, %eax, [[X1]]
+; X64: 		pextrw	$1, [[X1]], %ecx
+; X64: 		movd	[[X1]], %edx
+; X64: 		pinsrw	$0, %edx, %xmm
+; X64: 		pinsrw	$1, %eax, %xmm
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index 1723909..c787523 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -1,38 +1,39 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
 
-declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
-declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
-declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
 
-define i32 @crc32_8(i32 %a, i8 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
-; X32: _crc32_8:
+; X32: _crc32_32_8:
 ; X32:     crc32b   8(%esp), %eax
 
-; X64: _crc32_8:
-; X64:     crc32b   %sil, %eax
+; X64: _crc32_32_8:
+; X64:     crc32b   %sil,
 }
 
 
-define i32 @crc32_16(i32 %a, i16 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
-; X32: _crc32_16:
+; X32: _crc32_32_16:
 ; X32:     crc32w   8(%esp), %eax
 
-; X64: _crc32_16:
-; X64:     crc32w   %si, %eax
+; X64: _crc32_32_16:
+; X64:     crc32w   %si,
 }
 
 
-define i32 @crc32_32(i32 %a, i32 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp
-; X32: _crc32_32:
+; X32: _crc32_32_32:
 ; X32:     crc32l   8(%esp), %eax
 
-; X64: _crc32_32:
-; X64:     crc32l   %esi, %eax
+; X64: _crc32_32_32:
+; X64:     crc32l   %esi,
 }
+
diff --git a/test/CodeGen/X86/sse42_64.ll b/test/CodeGen/X86/sse42_64.ll
new file mode 100644
index 0000000..8b3a69b
--- /dev/null
+++ b/test/CodeGen/X86/sse42_64.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_8:
+; X64:     crc32b   %sil,
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_64:
+; X64:     crc32q   %rsi,
+}
+
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 02399c4..a57fa58 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
 ; CHECK: fail
 ; CHECK-NOT: fail
 
@@ -117,7 +117,16 @@ define <2 x double> @d8(<2 x double> %f) {
   ret <2 x double> %t
 }
 
-; This one should fail to fuse.
+; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
+; it produces:
+;   callq	test_vd
+;   movapd	(%rsp), %xmm1           # 16-byte Reload
+;   hsubpd	%xmm0, %xmm1
+;   movapd	%xmm1, %xmm0
+;   addq	$24, %rsp
+;   ret
+; RABasic still tries to fold this one.
+
 define <2 x double> @z0(<2 x double> %f) {
   %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 77710ad..d6c16ca 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -412,9 +412,9 @@ return:
 ; can fall-through into the ret and the other side has to branch anyway.
 
 ; CHECK: TESTE:
-; CHECK: imulq
-; CHECK-NEXT: LBB8_2:
-; CHECK-NEXT: ret
+; CHECK: ret
+; CHECK-NOT: ret
+; CHECK: size TESTE
 
 define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 060ce0f..c18c7aa 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
-; CHECK: subq  ${{24|72}}, %rsp
+; CHECK: subq  ${{24|72|80}}, %rsp
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index c997661..84fcbc7 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -12,3 +12,27 @@ define i1 @a(i32 %x) zeroext nounwind {
 ; CHECK: movzbl	%al, %eax
 ; CHECK: ret
 }
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test2:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index 8fbbd39..a0448ec 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -7,10 +7,10 @@
 ; Use the flags on the add.
 
 ; CHECK: test1:
-;      CHECK: addl    (%r[[A0:di|cx]]), {{%esi|%edx}}
-; CHECK-NEXT: movl    {{%edx|%r8d}}, %eax
-; CHECK-NEXT: cmovnsl {{%ecx|%r9d}}, %eax
-; CHECK-NEXT: ret
+;     CHECK: addl
+; CHECK-NOT: test
+;     CHECK: cmovnsl
+;     CHECK: ret
 
 define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
 	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
@@ -42,7 +42,7 @@ false:
 ; Do use the flags result of the and here, since the and has another use.
 
 ; CHECK: test3:
-;      CHECK: andl    $16, %e[[A0]]
+;      CHECK: andl    $16, %e
 ; CHECK-NEXT: jne
 
 define void @test3(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
new file mode 100644
index 0000000..73d80eb
--- /dev/null
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00"
+@sel = external global i8*
+@sel3 = external global i8*
+@sel4 = external global i8*
+@sel5 = external global i8*
+@sel6 = external global i8*
+@sel7 = external global i8*
+
+; X64: @foo
+; X64: jmp
+; WIN64: @foo
+; WIN64: callq
+define void @foo(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare i32 @printf(i8*, ...) optsize noredzone
+
+; X64: @bar
+; X64: jmp
+; WIN64: @bar
+; WIN64: jmp
+define void @bar(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  tail call void @bar2(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare void @bar2(i8*, i64) optsize noredzone
+
+; X64: @foo2
+; X64: jmp
+; WIN64: @foo2
+; WIN64: callq
+define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
+entry:
+  %tmp1 = load i8** @sel, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x2(i8*, i8*, ...) optsize noredzone
+
+; X64: @foo6
+; X64: jmp
+; WIN64: @foo6
+; WIN64: callq
+define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo7
+; X64: callq
+; WIN64: @foo7
+; WIN64: callq
+define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp6 = load i8** @sel7, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo8
+; X64: callq
+; WIN64: @foo8
+; WIN64: callq
+define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
+  ret i8* %call
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index dab5dd1..f487654 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
-; RUN: grep extractps   %t | count 1
-; RUN: grep pextrd      %t | count 1
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: not grep extractps   %t
+; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: not grep movss   %t
+; RUN: grep movss   %t | count 2
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index b013730..2c8796b 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
-; RUN: grep movss    %t | count 3
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss    %t | count 4
 ; RUN: grep movhlps  %t | count 1
-; RUN: grep pshufd   %t | count 1
+; RUN: not grep pshufd   %t 
 ; RUN: grep unpckhpd %t | count 1
 
 define void @test1(<4 x float>* %F, float* %f) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 2ee87fe..06f38ed 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,8 +1,9 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
 
+; sse:  t1:
+; sse2: t1:
 define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
-; sse: movaps
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
@@ -10,6 +11,8 @@ define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
         ret <4 x float> %tmp1
 }
 
+; sse:  t2:
+; sse2: t2:
 define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 ; sse: shufps
 ; sse2: pshufd
@@ -18,8 +21,9 @@ define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
+; sse:  t3:
+; sse2: t3:
 define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
-; sse: movaps
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
@@ -27,7 +31,10 @@ define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
+; sse:  t4:
+; sse2: t4:
 define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index 39e7d71..fe7fa2f 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=sandybridge | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
 
 ; Test that we are not lowering uinttofp to scalars
 define <4 x float> @test1(<4 x i32> %A) nounwind {
diff --git a/test/CodeGen/X86/visibility2.ll b/test/CodeGen/X86/visibility2.ll
new file mode 100644
index 0000000..72ea733
--- /dev/null
+++ b/test/CodeGen/X86/visibility2.ll
@@ -0,0 +1,18 @@
+; This test case ensures that when the visibility of a global declaration is 
+; emitted they are not treated as definitions.  Test case for r132825.
+; Fixes <rdar://problem/9429892>.
+;
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+
+@foo_private_extern_str = external hidden global i8*
+
+define void @foo1() nounwind ssp {
+entry:
+  %tmp = load i8** @foo_private_extern_str, align 8
+  call void @foo3(i8* %tmp)
+  ret void
+}
+
+declare void @foo3(i8*)
+
+; CHECK-NOT: .private_extern
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index 82c8252..c91627c 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -4,15 +4,15 @@
 
 ; Both loads should happen before either store.
 
-; CHECK: movl  (%rdi), %eax
-; CHECK: movl  (%rsi), %ecx
-; CHECK: movl  %ecx, (%rdi)
-; CHECK: movl  %eax, (%rsi)
+; CHECK: movl  (%rdi), %[[R1:...]]
+; CHECK: movl  (%rsi), %[[R2:...]]
+; CHECK: movl  %[[R2]], (%rdi)
+; CHECK: movl  %[[R1]], (%rsi)
 
-; WIN64: movl  (%rcx), %eax
-; WIN64: movl  (%rdx), %esi
-; WIN64: movl  %esi, (%rcx)
-; WIN64: movl  %eax, (%rdx)
+; WIN64: movl  (%rcx), %[[R1:...]]
+; WIN64: movl  (%rdx), %[[R2:...]]
+; WIN64: movl  %[[R2]], (%rcx)
+; WIN64: movl  %[[R1]], (%rdx)
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index cbd38da..e39d007 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
+; Passing the same value in two registers creates a false interference that
+; only -join-physregs resolves. It could also be handled by a parallel copy.
+
 define i64 @foo(i64 %n, i64 %x) nounwind {
 entry:
 
@@ -40,9 +43,9 @@ entry:
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, %rax
 
-; EFI: leaq  15(%rcx), [[R1:%r..]]
+; EFI: leaq  15(%rcx), [[R1:%r.*]]
 ; EFI: andq  $-16, [[R1]]
-; EFI: movq  %rsp, [[R64:%r..]]
+; EFI: movq  %rsp, [[R64:%r.*]]
 ; EFI: subq  [[R1]], [[R64]]
 ; EFI: movq  [[R64]], %rsp
 
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 2465f23..07ccb23 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -39,7 +39,7 @@ define void @ccc(i64 %x) nounwind {
 
 ; This requires a mov and a 64-bit and.
 ; CHECK: ddd:
-; CHECK: movabsq $4294967296, %rax
+; CHECK: movabsq $4294967296, %r
 ; CHECK: andq %rax, %rdi
 
 define void @ddd(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/x86-64-extend-shift.ll b/test/CodeGen/X86/x86-64-extend-shift.ll
index 6852785..6ebaeee 100644
--- a/test/CodeGen/X86/x86-64-extend-shift.ll
+++ b/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -2,7 +2,7 @@
 ; Formerly there were two shifts.
 
 define i64 @baz(i32 %A) nounwind {
-; CHECK:  shlq  $49, %rax
+; CHECK:  shlq  $49, %r
         %tmp1 = shl i32 %A, 17
         %tmp2 = zext i32 %tmp1 to i64
         %tmp3 = shl i64 %tmp2, 32
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
new file mode 100644
index 0000000..fdf68f9
--- /dev/null
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; Splat patterns below
+
+
+define <4 x i32> @shl4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shl4
+; CHECK:      pslld
+; CHECK-NEXT: pslld
+  %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @shr4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shr4
+; CHECK:      psrld
+; CHECK-NEXT: psrld
+  %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @sra4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      sra4
+; CHECK:      psrad
+; CHECK-NEXT: psrad
+  %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <2 x i64> @shl2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shl2
+; CHECK:      psllq
+; CHECK-NEXT: psllq
+  %B = shl <2 x i64> %A,  < i64 2, i64 2>
+  %C = shl <2 x i64> %A,  < i64 9, i64 9>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+define <2 x i64> @shr2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shr2
+; CHECK:      psrlq
+; CHECK-NEXT: psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 8>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 1>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+define <8 x i16> @shl8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shl8
+; CHECK:      psllw
+; CHECK-NEXT: psllw
+  %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @shr8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shr8
+; CHECK:      psrlw
+; CHECK-NEXT: psrlw
+  %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @sra8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      sra8
+; CHECK:      psraw
+; CHECK-NEXT: psraw
+  %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+; non splat test
+
+
+define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
+entry:
+; CHECK: sll8_nosplat
+; CHECK-NOT: psll
+; CHECK-NOT: psll
+  %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+
+define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
+entry:
+; CHECK: shr2_nosplat
+; CHECK-NOT:  psrlq
+; CHECK-NOT:  psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 1>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 0>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+; Other shifts
+
+define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shl2_other
+; CHECK-not:      psllq
+  %B = shl <2 x i32> %A,  < i32 2, i32 2>
+  %C = shl <2 x i32> %A,  < i32 9, i32 9>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}
+
+define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shr2_other
+; CHECK-NOT:      psrlq
+  %B = lshr <2 x i32> %A,  < i32 8, i32 8>
+  %C = lshr <2 x i32> %A,  < i32 1, i32 1>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index b90d81a..178c59d 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -29,9 +29,8 @@ entry:
         ret i32 %tmp4
         
 ; X64: test3:
-; X64:	notl	[[A1:%esi|%edx]]
-; X64:	andl	[[A0:%edi|%ecx]], [[A1]]
-; X64:	movl	[[A1]], %eax
+; X64:	notl
+; X64:	andl
 ; X64:	shrl	%eax
 ; X64:	ret
 
@@ -139,7 +138,7 @@ entry:
   %t2 = add i32 %t1, -1
   ret i32 %t2
 ; X64: test8:
-; X64:   notl %eax
+; X64:   notl {{%eax|%edi|%ecx}}
 ; X32: test8:
 ; X32:   notl %eax
 }
diff --git a/test/CodeGen/XCore/bitrev.ll b/test/CodeGen/XCore/bitrev.ll
deleted file mode 100644
index 09202d3..0000000
--- a/test/CodeGen/XCore/bitrev.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep bitrev %t1.s | count 1 
-declare i32 @llvm.xcore.bitrev(i32)
-
-define i32 @test(i32 %val) {
-	%result = call i32 @llvm.xcore.bitrev(i32 %val)
-	ret i32 %result
-}
diff --git a/test/CodeGen/XCore/misc-intrinsics.ll b/test/CodeGen/XCore/misc-intrinsics.ll
new file mode 100644
index 0000000..f504a2e
--- /dev/null
+++ b/test/CodeGen/XCore/misc-intrinsics.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+%0 = type { i32, i32 }
+
+declare i32 @llvm.xcore.bitrev(i32)
+declare i32 @llvm.xcore.crc32(i32, i32, i32)
+declare %0 @llvm.xcore.crc8(i32, i32, i32)
+
+define i32 @bitrev(i32 %val) {
+; CHECK: bitrev:
+; CHECK: bitrev r0, r0
+	%result = call i32 @llvm.xcore.bitrev(i32 %val)
+	ret i32 %result
+}
+
+define i32 @crc32(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc32:
+; CHECK: crc32 r0, r1, r2
+	%result = call i32 @llvm.xcore.crc32(i32 %crc, i32 %data, i32 %poly)
+	ret i32 %result
+}
+
+define %0 @crc8(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc8:
+; CHECK: crc8 r0, r1, r1, r2
+	%result = call %0 @llvm.xcore.crc8(i32 %crc, i32 %data, i32 %poly)
+	ret %0 %result
+}
diff --git a/test/CodeGen/XCore/mul64.ll b/test/CodeGen/XCore/mul64.ll
index 77c6b42..3d373b1 100644
--- a/test/CodeGen/XCore/mul64.ll
+++ b/test/CodeGen/XCore/mul64.ll
@@ -9,7 +9,7 @@ entry:
 }
 ; CHECK: umul_lohi:
 ; CHECK: ldc [[REG:r[0-9]+]], 0
-; CHECK-NEXT: lmul r1, r0, r1, r0, [[REG]], [[REG]]
+; CHECK-NEXT: lmul {{.*}}, [[REG]], [[REG]]
 ; CHECK-NEXT: retsp 0
 
 define i64 @smul_lohi(i32 %a, i32 %b) {
@@ -23,9 +23,7 @@ entry:
 ; CHECK: ldc
 ; CHECK-NEXT: mov
 ; CHECK-NEXT: maccs
-; CHECK-NEXT: mov r0,
-; CHECK-NEXT: mov r1,
-; CHECK-NEXT: retsp 0
+; CHECK: retsp 0
 
 define i64 @mul64(i64 %a, i64 %b) {
 entry:
@@ -37,7 +35,6 @@ entry:
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: lmul
-; CHECK-NEXT: mov r0,
 
 define i64 @mul64_2(i64 %a, i32 %b) {
 entry:
@@ -50,4 +47,4 @@ entry:
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: add r1,
-; CHECK-NEXT: retsp 0
+; CHECK: retsp 0
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
new file mode 100644
index 0000000..d273d73
--- /dev/null
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -0,0 +1,18 @@
+; RUN: llc %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s
+
+; Test that we produce a .debug_frame, not an .eh_frame
+
+; CHECK: .cfi_sections .debug_frame
+
+define void @f() nounwind {
+entry:
+  ret void
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
diff --git a/test/DebugInfo/X86/dg.exp b/test/DebugInfo/X86/dg.exp
new file mode 100644
index 0000000..7b7bd4e
--- /dev/null
+++ b/test/DebugInfo/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/DebugInfo/X86/eh_symbol.ll b/test/DebugInfo/X86/eh_symbol.ll
new file mode 100644
index 0000000..a87afed
--- /dev/null
+++ b/test/DebugInfo/X86/eh_symbol.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=i386-apple-macosx -disable-cfi %s -o - | FileCheck %s
+
+; test that we don't produce foo.eh symbols is a debug_frame section.
+; CHECK-NOT: .globl	_f.eh
+
+define i32 @f() nounwind readnone optsize {
+entry:
+  ret i32 42
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 ()* @f, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
new file mode 100644
index 0000000..145649b
--- /dev/null
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
+
+; CHECK:      .section        .debug_line,"",@progbits
+; CHECK-NEXT: .Lsection_line:
+
+; CHECK:      .long   .Lsection_line          # DW_AT_stmt_list
+
+define void @f() {
+entry:
+  ret void
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
new file mode 100644
index 0000000..1c4456f
--- /dev/null
+++ b/test/DebugInfo/X86/subreg.ll
@@ -0,0 +1,27 @@
+; RUN: llc %s -mtriple=x86_64-pc-linux-gnu -O0 -o - | FileCheck %s
+
+; We are testing that a value in a 16 bit register gets reported as
+; being in its superregister.
+; FIXME: There should be a DW_OP_bit_piece too.
+
+; CHECK: .byte   80                      # DW_OP_reg0
+
+define i16 @f(i16 signext %zzz) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i16 %zzz}, i64 0, metadata !0)
+  %conv = sext i16 %zzz to i32, !dbg !7
+  %conv1 = trunc i32 %conv to i16
+  ret i16 %conv1
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 590081, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null}
+!6 = metadata !{i32 589860, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 4, i32 22, metadata !8, null}
+!8 = metadata !{i32 589835, metadata !1, i32 3, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/pr9951.ll b/test/DebugInfo/pr9951.ll
new file mode 100644
index 0000000..7716cd7
--- /dev/null
+++ b/test/DebugInfo/pr9951.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple x86_64-apple-darwin10.0.0 -disable-cfi %s -o - | FileCheck %s
+
+define i32 @f() nounwind {
+entry:
+  ret i32 42
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @f, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+
+
+; CHECK:      _f:                                     ## @f
+; CHECK-NEXT: Ltmp0:
+
+; CHECK:      Ltmp9 = (Ltmp3-Ltmp2)-0
+; CHECK-NEXT:	.long	Ltmp9
+; CHECK-NEXT:	.quad	Ltmp0
diff --git a/test/FrontendC++/2006-11-06-StackTrace.cpp b/test/FrontendC++/2006-11-06-StackTrace.cpp
index b79c0bf..2813c36 100644
--- a/test/FrontendC++/2006-11-06-StackTrace.cpp
+++ b/test/FrontendC++/2006-11-06-StackTrace.cpp
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that we can get a
 // meaningful stack trace from a C++ program.
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN:    llc --disable-cfi --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: echo {break DeepStack::deepest\nrun 17\nwhere\n} > %t.in 
diff --git a/test/FrontendC++/2006-11-30-Pubnames.cpp b/test/FrontendC++/2006-11-30-Pubnames.cpp
index 239d3f5..fc7beeb 100644
--- a/test/FrontendC++/2006-11-30-Pubnames.cpp
+++ b/test/FrontendC++/2006-11-30-Pubnames.cpp
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that we can access 
 // qualified global names.
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:   llc --disable-fp-elim -o %t.s -O0
+// RUN:   llc -disable-cfi --disable-fp-elim -o %t.s -O0
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: %llvmdsymutil %t.exe 
diff --git a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
index e3616da..da09c0b 100644
--- a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
+++ b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -g %s -o - | llc -O0 -o %t.s
+// RUN: %llvmgcc -S -g %s -o - | llc --disable-cfi -O0 -o %t.s
 // RUN: %compile_c %t.s -o %t.o
 // PR4025
 
diff --git a/test/FrontendC++/2010-08-31-ByValArg.cpp b/test/FrontendC++/2010-08-31-ByValArg.cpp
index be0d354..4ccaabd 100644
--- a/test/FrontendC++/2010-08-31-ByValArg.cpp
+++ b/test/FrontendC++/2010-08-31-ByValArg.cpp
@@ -1,7 +1,7 @@
 // This regression test checks byval arguments' debug info.
 // Radar 8367011
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN:    llc --disable-cfi --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: echo {break get\nrun\np missing_arg.b} > %t.in 
diff --git a/test/FrontendC/2008-07-29-EHLabel.ll b/test/FrontendC/2008-07-29-EHLabel.ll
index 7577bc9..186eafa 100644
--- a/test/FrontendC/2008-07-29-EHLabel.ll
+++ b/test/FrontendC/2008-07-29-EHLabel.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | %llvmgcc -xassembler -c -o /dev/null -
+; RUN: llc -disable-cfi %s -o - | %llvmgcc -xassembler -c -o /dev/null -
 ; PR2609
 	%struct..0._11 = type { i32 }
 	%struct..1__pthread_mutex_s = type { i32, i32, i32, i32, i32, %struct..0._11 }
diff --git a/test/FrontendC/2009-02-17-BitField-dbg.c b/test/FrontendC/2009-02-17-BitField-dbg.c
index 80ccc4a..88d2cbb 100644
--- a/test/FrontendC/2009-02-17-BitField-dbg.c
+++ b/test/FrontendC/2009-02-17-BitField-dbg.c
@@ -1,6 +1,6 @@
 // Check bitfields.
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN: llc --disable-fp-elim -o 2009-02-17-BitField-dbg.s
+// RUN: llc -disable-cfi --disable-fp-elim -o 2009-02-17-BitField-dbg.s
 // RUN: %compile_c 2009-02-17-BitField-dbg.s -o 2009-02-17-BitField-dbg.o
 // RUN: echo {ptype mystruct} > %t2
 // RUN: gdb -q -batch -n -x %t2 2009-02-17-BitField-dbg.o | \
diff --git a/test/FrontendC/2010-01-05-LinkageName.c b/test/FrontendC/2010-01-05-LinkageName.c
index 9c1a215..279df03 100644
--- a/test/FrontendC/2010-01-05-LinkageName.c
+++ b/test/FrontendC/2010-01-05-LinkageName.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -O2 -S -g %s -o - | llc -o 2010-01-05-LinkageName.s -O0 
+// RUN: %llvmgcc -O2 -S -g %s -o - | llc -disable-cfi -o 2010-01-05-LinkageName.s -O0 
 // RUN: %compile_c 2010-01-05-LinkageName.s -o 2010-01-05-LinkageName.s
 
 struct tm {};
diff --git a/test/FrontendC/2010-01-14-StaticVariable.c b/test/FrontendC/2010-01-14-StaticVariable.c
index 80dd4d4..0635900 100644
--- a/test/FrontendC/2010-01-14-StaticVariable.c
+++ b/test/FrontendC/2010-01-14-StaticVariable.c
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that llvm emitted
 // debug info does not crash gdb.
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN:    llc -disable-cfi --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: echo {quit\n} > %t.in 
 // RUN: gdb -q -batch -n -x %t.in %t.o > /dev/null
diff --git a/test/FrontendC/2010-02-16-DbgVarScope.c b/test/FrontendC/2010-02-16-DbgVarScope.c
index 1d912d0f..24910ad 100644
--- a/test/FrontendC/2010-02-16-DbgVarScope.c
+++ b/test/FrontendC/2010-02-16-DbgVarScope.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN:    llc -disable-cfi --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: echo {break 24\nrun\np loc\n} > %t.in 
diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c
index 33b8770..ca7625f 100644
--- a/test/FrontendC/2010-05-18-asmsched.c
+++ b/test/FrontendC/2010-05-18-asmsched.c
@@ -3,8 +3,9 @@
 
 void foo(int x, int y) {
 // CHECK: bar
-// CHECK: movq  %r9, %r10
-// CHECK: movq  %rdi, %r9
+// CHECK-NOT: {{, %r9$}}
+// CHECK: movq  %r9,
+// CHECK: movq  {{.*}}, %r9
 // CHECK: bar
   register int lr9 asm("r9") = x;
   register int lr10 asm("r10") = y;
diff --git a/test/FrontendC/ARM/dg.exp b/test/FrontendC/ARM/dg.exp
new file mode 100644
index 0000000..df7d49e
--- /dev/null
+++ b/test/FrontendC/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] && [llvm_gcc_supports c] } {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/FrontendC/ARM/inline-asm-multichar.c b/test/FrontendC/ARM/inline-asm-multichar.c
new file mode 100644
index 0000000..7e2eeef
--- /dev/null
+++ b/test/FrontendC/ARM/inline-asm-multichar.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S -march=armv7a %s 
+
+// XFAIL: *
+// XTARGET: arm
+
+int t1() {
+  static float k = 1.0f;
+CHECK: call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"
+  __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv,m" (k) : "s15");
+  return 0;
+}
diff --git a/test/FrontendC/pr4349.c b/test/FrontendC/pr4349.c
index 24acd9c..49c89e2 100644
--- a/test/FrontendC/pr4349.c
+++ b/test/FrontendC/pr4349.c
@@ -16,22 +16,22 @@ struct svar
 {
     void *ptr;
 };
-// CHECK: @svars1 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
+// CHECK: @svars1 = global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
 struct svar svars1[] =
 {
     { &((cpu.pc).w[0]) }
 };
-// CHECK: @svars2 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x i8]* bitcast (%struct.cpu* @cpu to [2 x i8]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) }]
+// CHECK: @svars2 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x i8]* bitcast (%struct.cpu* @cpu to [2 x i8]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) }]
 struct svar svars2[] =
 {
     { &((cpu.pc).b[0][1]) }
 };
-// CHECK: @svars3 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (i16* getelementptr ([2 x i16]* bitcast (%struct.cpu* @cpu to [2 x i16]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) to i8*) }]
+// CHECK: @svars3 = global [1 x %struct.svar] [%struct.svar { i8* bitcast (i16* getelementptr ([2 x i16]* bitcast (%struct.cpu* @cpu to [2 x i16]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) to i8*) }]
 struct svar svars3[] =
 {
     { &((cpu.pc).w[1]) }
 };
-// CHECK: @svars4 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x [2 x i8]]* bitcast (%struct.cpu* @cpu to [2 x [2 x i8]]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1, i{{[0-9]+}} 1) }]
+// CHECK: @svars4 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x [2 x i8]]* bitcast (%struct.cpu* @cpu to [2 x [2 x i8]]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1, i{{[0-9]+}} 1) }]
 struct svar svars4[] =
 {
     { &((cpu.pc).b[1][1]) }
diff --git a/test/FrontendC/struct-matching-constraint.c b/test/FrontendC/struct-matching-constraint.c
new file mode 100644
index 0000000..d002cdd
--- /dev/null
+++ b/test/FrontendC/struct-matching-constraint.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S -march=armv7a %s 
+
+// XFAIL: *
+// XTARGET: arm
+
+typedef struct __simd128_uint16_t
+{
+  __neon_uint16x8_t val;
+} uint16x8_t;
+
+void b(uint16x8_t sat, uint16x8_t luma)
+{
+  __asm__("vmov.16 %1, %0   \n\t"
+                                           "vtrn.16 %0, %1   \n\t"
+   :"=w"(luma), "=w"(sat)
+   :"0"(luma)
+   );
+
+}
diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m
index 8ed7c24..825bbd7 100644
--- a/test/FrontendObjC/2009-08-17-DebugInfo.m
+++ b/test/FrontendObjC/2009-08-17-DebugInfo.m
@@ -1,6 +1,6 @@
 // This is a regression test on debug info to make sure that we can set a
 // breakpoint on a objective message.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llc -o %t.s -O0
+// RUN: %llvmgcc -S -O0 -g %s -o - | llc -disable-cfi -o %t.s -O0
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe -framework Foundation
 // RUN: echo {break randomFunc\n} > %t.in 
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index 50a2b70..f789441 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -270,6 +270,9 @@
 @ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
         msr  cpsr_fc, r0
 
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  cpsr_all, r0
+
 @ CHECK: msr  cpsr_fsx, r0 @ encoding: [0x00,0xf0,0x2e,0xe1]
         msr  cpsr_fsx, r0
 
@@ -309,3 +312,6 @@
 @ CHECK: ldrexd  r0, r1, [r0] @ encoding: [0x9f,0x0f,0xb0,0xe1]
         ldrexd  r0, r1, [r0]
 
+@ CHECK: ssat16  r0, #7, r0 @ encoding: [0x30,0x0f,0xa6,0xe6]
+        ssat16  r0, #7, r0
+
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 0fe7c50..18061f5 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -9,10 +9,10 @@
 barf:                                   @ @barf
 @ BB#0:                                 @ %entry
 	movw	r0, :lower16:GOT-(.LPC0_2+8)
-	movt	r0, :upper16:GOT-(.LPC0_2+16)
+	movt	r0, :upper16:GOT-(.LPC0_2+8)
 .LPC0_2:
 @ ASM:          movw    r0, :lower16:(GOT-(.LPC0_2+8))
-@ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+16))
+@ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+8))
 
 @@ make sure that the text section fixups are sane too
 @ OBJ:                 '.text'
@@ -25,7 +25,7 @@ barf:                                   @ @barf
 @ OBJ-NEXT:            'sh_info', 0x00000000
 @ OBJ-NEXT:            'sh_addralign', 0x00000004
 @ OBJ-NEXT:            'sh_entsize', 0x00000000
-@ OBJ-NEXT:            '_section_data', 'f00f0fe3 ff0f4fe3'
+@ OBJ-NEXT:            '_section_data', 'f00f0fe3 f40f4fe3'
 
 @ OBJ:              Relocation 0x00000000
 @ OBJ-NEXT:         'r_offset', 0x00000000
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
new file mode 100644
index 0000000..6fce4038
--- /dev/null
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -0,0 +1,37 @@
+; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: FileCheck %s
+
+; FIXME: This file needs to be in .s form!
+; We wanna test relocatable thumb function call,
+; but ARMAsmParser cannot handle "bl foo(PLT)" yet
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32"
+target triple = "thumbv7-none--gnueabi"
+
+define void @foo() nounwind {
+entry:
+  ret void
+}
+
+define void @bar() nounwind {
+entry:
+  call void @foo()
+  ret void
+}
+
+
+; make sure that bl 0 <foo> (fff7feff) is correctly encoded
+; CHECK: '_section_data', '70470000 2de90048 fff7feff bde80088'
+
+;  Offset     Info    Type            Sym.Value  Sym. Name
+; 00000008  0000070a R_ARM_THM_CALL    00000001   foo
+; CHECK:           Relocation 0x00000000 
+; CHECK-NEXT:      'r_offset', 0x00000008
+; CHECK-NEXT:      'r_sym', 0x00000007
+; CHECK-NEXT:      'r_type', 0x0000000a
+
+; make sure foo is thumb function: bit 0 = 1
+; CHECK:           Symbol 0x00000007
+; CHECK-NEXT:      'foo'
+; CHECK-NEXT:      'st_value', 0x00000001
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
new file mode 100644
index 0000000..a1b3c31
--- /dev/null
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -0,0 +1,20 @@
+@@ test st_value bit 0 of thumb function
+@ RUN: llvm-mc %s -triple=thumbv7-linux-gnueabi -filetype=obj -o - | \
+@ RUN: elf-dump  | FileCheck %s
+	.syntax unified
+	.text
+	.globl	foo
+	.align	2
+	.type	foo,%function
+	.code	16
+	.thumb_func
+foo:
+	bx	lr
+
+@@ make sure foo is thumb function: bit 0 = 1 (st_value)
+@CHECK:           Symbol 0x00000004
+@CHECK-NEXT:      'st_name', 0x00000001
+@CHECK-NEXT:      'st_value', 0x00000001
+@CHECK-NEXT:      'st_size', 0x00000000
+@CHECK-NEXT:      'st_bind', 0x00000001
+@CHECK-NEXT:      'st_type', 0x00000002
diff --git a/test/MC/ARM/simple-encoding.ll b/test/MC/ARM/simple-encoding.ll
index 7b581b3..3322803 100644
--- a/test/MC/ARM/simple-encoding.ll
+++ b/test/MC/ARM/simple-encoding.ll
@@ -1,4 +1,4 @@
-;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding -disable-cgp-branch-opts < %s | FileCheck %s
+;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding -disable-cgp-branch-opts -join-physregs < %s | FileCheck %s
 
 
 ;FIXME: Once the ARM integrated assembler is up and going, these sorts of tests
diff --git a/test/MC/ARM/thumb.s b/test/MC/ARM/thumb.s
index 342a390..55d9789 100644
--- a/test/MC/ARM/thumb.s
+++ b/test/MC/ARM/thumb.s
@@ -12,6 +12,8 @@
 
 @ CHECK: blx	r9                   @ encoding: [0xc8,0x47]
 	blx	r9
+@ CHECK: blx	r10                     @ encoding: [0xd0,0x47]
+  blx r10
 
 @ CHECK: rev	r2, r3               @ encoding: [0x1a,0xba]
 @ CHECK: rev16	r3, r4               @ encoding: [0x63,0xba]
@@ -68,3 +70,6 @@
 
 @ CHECK: cpsie aif @ encoding: [0x67,0xb6]
         cpsie aif
+
+@ CHECK: mov  r0, pc @ encoding: [0x78,0x46]
+        mov  r0, pc
diff --git a/test/MC/ARM/thumb2.s b/test/MC/ARM/thumb2.s
index 5342b90..4e9d4e1 100644
--- a/test/MC/ARM/thumb2.s
+++ b/test/MC/ARM/thumb2.s
@@ -300,3 +300,5 @@
   ldrex  r0, [r0]
 @ CHECK: ldrexd  r0, r1, [r0] @ encoding: [0xd0,0xe8,0x7f,0x01]
   ldrexd  r0, r1, [r0]
+@ CHECK: ssat16  r0, #7, r0 @ encoding: [0x20,0xf3,0x06,0x00]
+  ssat16  r0, #7, r0
diff --git a/test/MC/ARM/xscale-attributes.ll b/test/MC/ARM/xscale-attributes.ll
new file mode 100644
index 0000000..e576278
--- /dev/null
+++ b/test/MC/ARM/xscale-attributes.ll
@@ -0,0 +1,31 @@
+; RUN: llc %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale -o - | \
+; RUN: FileCheck -check-prefix=ASM %s
+
+; RUN: llc %s -mtriple=thumbv5-linux-gnueabi -filetype=obj \
+; RUN: -mcpu=xscale -o - | elf-dump --dump-section-data | \
+; RUN: FileCheck -check-prefix=OBJ %s
+
+; FIXME: The OBJ test should be a .s to .o test and the ASM test should
+; be moved to test/CodeGen/ARM.
+
+define void @foo() nounwind {
+entry:
+  ret void
+}
+
+; ASM:           .eabi_attribute 6, 5
+; ASM-NEXT:      .eabi_attribute 8, 1
+; ASM-NEXT:      .eabi_attribute 9, 1
+
+; OBJ:           Section 0x00000004
+; OBJ-NEXT:      'sh_name', 0x0000000c
+; OBJ-NEXT:      'sh_type', 0x70000003
+; OBJ-NEXT:	   'sh_flags', 0x00000000
+; OBJ-NEXT:	   'sh_addr', 0x00000000
+; OBJ-NEXT:	   'sh_offset', 0x00000038
+; OBJ-NEXT:	   'sh_size', 0x00000020
+; OBJ-NEXT:	   'sh_link', 0x00000000
+; OBJ-NEXT:	   'sh_info', 0x00000000
+; OBJ-NEXT:	   'sh_addralign', 0x00000001
+; OBJ-NEXT:	   'sh_entsize', 0x00000000
+; OBJ-NEXT:      '_section_data', '411f0000 00616561 62690001 15000000 06050801 09011401 15011703 18011901'
diff --git a/test/MC/AsmParser/directive_seh.s b/test/MC/AsmParser/directive_seh.s
new file mode 100644
index 0000000..98fc606
--- /dev/null
+++ b/test/MC/AsmParser/directive_seh.s
@@ -0,0 +1,48 @@
+# RUN: llvm-mc -triple x86_64-pc-win32 %s | FileCheck %s
+
+# CHECK: .seh_proc func
+# CHECK: .seh_pushframe @code
+# CHECK: .seh_stackalloc 24
+# CHECK: .seh_savereg 6, 16
+# CHECK: .seh_savexmm 8, 0
+# CHECK: .seh_pushreg 3
+# CHECK: .seh_setframe 3, 0
+# CHECK: .seh_endprologue
+# CHECK: .seh_handler __C_specific_handler, @except
+# CHECK-NOT: .section{{.*}}.xdata
+# CHECK: .seh_handlerdata
+# CHECK: .text
+# CHECK: .seh_startchained
+# CHECK: .seh_endprologue
+# CHECK: .seh_endchained
+# CHECK: .seh_endproc
+
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index 98259bd..6c79c38 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -56,3 +56,10 @@ TEST6:
 # CHECK:        .byte   35
 # CHECK:        .byte   9
 # CHECK:        .byte   10
+
+TEST7:
+        .byte 1, 2, 3, 4
+# CHECK:        .byte   1
+# CHECK-NEXT:   .byte   2
+# CHECK-NEXT:   .byte   3
+# CHECK-NEXT:   .byte   4
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
new file mode 100644
index 0000000..808b6eb
--- /dev/null
+++ b/test/MC/AsmParser/macro-args.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s
+
+.macro GET   var,re2g
+    movl   \var@GOTOFF(%ebx),\re2g
+.endm
+
+
+GET    is_sse, %eax
+
+// CHECK: movl	is_sse@GOTOFF(%ebx), %eax
diff --git a/test/MC/COFF/seh-section.s b/test/MC/COFF/seh-section.s
new file mode 100644
index 0000000..802cba5
--- /dev/null
+++ b/test/MC/COFF/seh-section.s
@@ -0,0 +1,37 @@
+// This test ensures that, if the section containing a function has a suffix
+// (e.g. .text$foo), its unwind info section also has a suffix (.xdata$foo).
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// XFAIL: *
+
+// CHECK:      Name                 = .xdata$foo
+// CHECK-NEXT: VirtualSize
+// CHECK-NEXT: VirtualAddress
+// CHECK-NEXT: SizeOfRawData        = 8
+// CHECK-NEXT: PointerToRawData
+// CHECK-NEXT: PointerToRelocations
+// CHECK-NEXT: PointerToLineNumbers
+// CHECK-NEXT: NumberOfRelocations  = 0
+// CHECK-NEXT: NumberOfLineNumbers  = 0
+// CHECK-NEXT: Charateristics
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_MEM_READ
+// CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT: SectionData
+// CHECK-NEXT:   01 05 02 00 05 50 04 02
+
+    .section .text$foo,"x"
+    .globl foo
+    .def foo; .scl 2; .type 32; .endef
+    .seh_proc foo
+foo:
+    subq $8, %rsp
+    .seh_stackalloc 8
+    pushq %rbp
+    .seh_pushreg %rbp
+    .seh_endprologue
+    popq %rbp
+    addq $8, %rsp
+    ret
+    .seh_endproc
+
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
new file mode 100644
index 0000000..8cafcb3
--- /dev/null
+++ b/test/MC/COFF/seh.s
@@ -0,0 +1,60 @@
+// This test checks that the SEH directives emit the correct unwind data.
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | coff-dump.py | FileCheck %s
+
+// CHECK:      Name                 = .xdata
+// CHECK-NEXT: VirtualSize
+// CHECK-NEXT: VirtualAddress
+// CHECK-NEXT: SizeOfRawData        = 52
+// CHECK-NEXT: PointerToRawData
+// CHECK-NEXT: PointerToRelocations
+// CHECK-NEXT: PointerToLineNumbers
+// CHECK-NEXT: NumberOfRelocations  = 4
+// CHECK-NEXT: NumberOfLineNumbers  = 0
+// CHECK-NEXT: Charateristics
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_MEM_READ
+// CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT: SectionData
+// CHECK-NEXT:   09 12 08 03 00 03 0F 30 - 0E 88 00 00 09 64 02 00
+// CHECK-NEXT:   04 22 00 1A 00 00 00 00 - 00 00 00 00 21 00 00 00
+// CHECK-NEXT:   00 00 00 00 1B 00 00 00 - 00 00 00 00 01 00 00 00
+// CHECK-NEXT:   00 00 00 00
+
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
+
+// Test emission of small functions.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index ade2952..ca072c7 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -164,6 +164,9 @@
 # CHECK: bx r12
 0x1c 0xff 0x2f 0xe1
 
+# CHECK: bxeq r5
+0x15 0xff 0x2f 0x01
+
 # CHECK:	uqadd16mi	r6, r11, r8
 0x18 0x60 0x6b 0x46
 
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index 774dbe4..895a5bb 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -262,3 +262,6 @@
 
 # CHECK:	nop.w
 0xaf 0xf3 0x00 0x80
+
+# CHECK:	bne	#24
+0x0c 0xd1
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 963a76c..f54dec0 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -3,7 +3,7 @@
 f:
 	.cfi_startproc
         nop
-	.cfi_offset %ebp, -16
+	.cfi_offset %rbp, -16
         nop
 	.cfi_endproc
 
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
new file mode 100644
index 0000000..a73f3a9
--- /dev/null
+++ b/test/MC/ELF/cfi-sections.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+
+.cfi_sections .debug_frame
+
+f1:
+        .cfi_startproc
+        nop
+        .cfi_endproc
+
+f2:
+        .cfi_startproc
+        nop
+        .cfi_endproc
+
+// ELF_64:      (('sh_name', 0x00000011) # '.debug_frame'
+// ELF_64-NEXT:  ('sh_type', 0x00000001)
+// ELF_64-NEXT:  ('sh_flags', 0x00000000)
+// ELF_64-NEXT:  ('sh_addr', 0x00000000)
+// ELF_64-NEXT:  ('sh_offset', 0x00000048)
+// ELF_64-NEXT:  ('sh_size', 0x00000048)
+// ELF_64-NEXT:  ('sh_link', 0x00000000)
+// ELF_64-NEXT:  ('sh_info', 0x00000000)
+// ELF_64-NEXT:  ('sh_addralign', 0x00000008)
+// ELF_64-NEXT:  ('sh_entsize', 0x00000000)
+// ELF_64-NEXT:  ('_section_data', '14000000 ffffffff 01000178 100c0708 90010000 00000000 14000000 00000000 00000000 00000000 01000000 00000000 14000000 00000000 00000000 00000000 01000000 00000000')
+
+// ELF_32:      (('sh_name', 0x00000010) # '.debug_frame'
+// ELF_32-NEXT:  ('sh_type', 0x00000001)
+// ELF_32-NEXT:  ('sh_flags', 0x00000000)
+// ELF_32-NEXT:  ('sh_addr', 0x00000000)
+// ELF_32-NEXT:  ('sh_offset', 0x00000038)
+// ELF_32-NEXT:  ('sh_size', 0x00000034)
+// ELF_32-NEXT:  ('sh_link', 0x00000000)
+// ELF_32-NEXT:  ('sh_info', 0x00000000)
+// ELF_32-NEXT:  ('sh_addralign', 0x00000004)
+// ELF_32-NEXT:  ('sh_entsize', 0x00000000)
+// ELF_32-NEXT:  ('_section_data', '10000000 ffffffff 0100017c 080c0404 88010000 0c000000 00000000 00000000 01000000 0c000000 00000000 01000000 01000000')
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index f7b20b5..25f3450 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -153,6 +153,13 @@
 // CHECK-NEXT:  ('r_sym',
 // CHECK-NEXT:  ('r_type', 0x00000001)
 // CHECK-NEXT: ),
+// Relocation 24 (foo@GOTTPOFF(%edx)) is of type R_386_TLS_IE_32 and uses the
+// symbol
+// CHECK-NEXT: Relocation 0x00000018
+// CHECK-NEXT: (('r_offset', 0x0000008e)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000021)
+// CHECK-NEXT: ),
 
 // Section 4 is bss
 // CHECK:      # Section 0x00000004
@@ -217,6 +224,7 @@ bar2:
         movl zed@TPOFF(%eax), %eax
         movl zed@DTPOFF(%eax), %eax
         pushl $bar
+        addl foo@GOTTPOFF(%edx), %eax
 
         .section        zedsec,"awT",@progbits
 zed:
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 4df09e1..2760232 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -17,6 +17,7 @@ bar:
         pushq    $bar
         movq	foo(%rip), %rdx
         leaq    foo-bar(%r14),%r14
+        addq	$bar,%rax         # R_X86_64_32S
 
 
 // CHECK:  # Section 0x00000001
@@ -106,6 +107,12 @@ bar:
 // CHECK-NEXT:  ('r_type', 0x00000002)
 // CHECK-NEXT:  ('r_addend', 0x0000005c)
 
+// CHECK: # Relocation 0x0000000e
+// CHECK-NEXT: (('r_offset', 0x00000063)
+// CHECK-NEXT:  ('r_sym', 0x00000002)
+// CHECK-NEXT:  ('r_type', 0x0000000b)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+
 // CHECK:   # Symbol 0x00000002
 // CHECK: (('st_name', 0x00000000) # ''
 // CHECK:  ('st_bind', 0x00000000)
diff --git a/test/MC/MachO/debug_frame.s b/test/MC/MachO/debug_frame.s
new file mode 100644
index 0000000..47264ef
--- /dev/null
+++ b/test/MC/MachO/debug_frame.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple i386-apple-darwin %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// Check that we don't produce a relocation for the CIE pointer and therefore
+// we have only one relocation in __debug_frame.
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_f
+	.align	4, 0x90
+_f:                                     ## @f
+Ltmp0:
+	.cfi_startproc
+## BB#0:                                ## %entry
+	movl	$42, %eax
+	ret
+Ltmp1:
+	.cfi_endproc
+Leh_func_end0:
+
+	.cfi_sections .debug_frame
+Ltext_end:
+
+// CHECK:       (('section_name', '__debug_frame\x00\x00\x00')
+// CHECK-NEXT:   ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('address', 8)
+// CHECK-NEXT:   ('size', 36)
+// CHECK-NEXT:   ('offset', 332)
+// CHECK-NEXT:   ('alignment', 2)
+// CHECK-NEXT:   ('reloc_offset', 368)
+// CHECK-NEXT:   ('num_reloc', 1)
+// CHECK-NEXT:   ('flags', 0x2000000)
+// CHECK-NEXT:   ('reserved1', 0)
+// CHECK-NEXT:   ('reserved2', 0)
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ('_relocations', [
+// CHECK-NEXT:   # Relocation 0
+// CHECK-NEXT:   (('word-0', 0x1c),
+// CHECK-NEXT:    ('word-1', 0x4000001)),
+// CHECK-NEXT: ])
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 4ac7efd..d2dd78d 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -19570,3 +19570,8 @@
 
 // CHECK: 	aeskeygenassist	$125, (%edx,%eax,4), %xmm2
                 aeskeygenassist $125, (%edx,%eax,4), %xmm2
+
+// CHECK:   blendvps	(%rax), %xmm1   # encoding: [0x66,0x0f,0x38,0x14,0x08]
+            blendvps (%rax), %xmm1
+// CHECK:   blendvps	%xmm2, %xmm1    # encoding: [0x66,0x0f,0x38,0x14,0xca]
+            blendvps %xmm2, %xmm1
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 28900bb..6017880 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -934,3 +934,15 @@ pshufw $90, %mm4, %mm0
 // CHECK: strl
 // CHECK: encoding: [0x0f,0x00,0xc8]
 	str %eax
+
+
+// PR9378
+// CHECK: fsubp
+// CHECK: encoding: [0xde,0xe1]
+fsubp %st,%st(1)
+
+// PR9164
+// CHECK: fsubp	%st(2)
+// CHECK: encoding: [0xde,0xe2]
+fsubp   %st, %st(2)
+
diff --git a/test/Other/X86/dg.exp b/test/Other/X86/dg.exp
new file mode 100644
index 0000000..7b7bd4e
--- /dev/null
+++ b/test/Other/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/X86/inline-asm-newline-terminator.ll
index af93cc0..af93cc0 100644
--- a/test/Other/inline-asm-newline-terminator.ll
+++ b/test/Other/X86/inline-asm-newline-terminator.ll
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
new file mode 100644
index 0000000..e0abc63
--- /dev/null
+++ b/test/TableGen/SetTheory.td
@@ -0,0 +1,167 @@
+// Test evaluation of set operations in dags.
+// RUN: tblgen -print-sets %s | FileCheck %s
+// XFAIL: vg_leak
+//
+// The -print-sets driver configures a primitive SetTheory instance that
+// understands these sets:
+
+class Set<dag d> {
+  dag Elements = d;
+}
+
+// It prints all Set instances and their ordered set interpretation.
+
+// Define some elements.
+def a;
+def b;
+def c;
+def d;
+
+// The 'add' operator evaluates and concatenates its arguments.
+def add;
+def S0a : Set<(add)>;
+def S0b : Set<(add a)>;
+def S0c : Set<(add a, b)>;
+def S0d : Set<(add b, a)>;
+def S0e : Set<(add a, a)>;
+def S0f : Set<(add a, a, b, a, c, b, d, a)>;
+def S0g : Set<(add b, a, b)>;
+// CHECK: S0a = [ ]
+// CHECK: S0b = [ a ]
+// CHECK: S0c = [ a b ]
+// CHECK: S0d = [ b a ]
+// CHECK: S0e = [ a ]
+// CHECK: S0f = [ a b c d ]
+// CHECK: S0g = [ b a ]
+
+// Defs of Set class expand into their elements.
+// Mixed sets and elements are flattened.
+def S1a : Set<(add S0a)>;
+def S1b : Set<(add S0a, S0a)>;
+def S1c : Set<(add S0d, S0f)>;
+def S1d : Set<(add d, S0d, S0f)>;
+// CHECK: S1a = [ ]
+// CHECK: S1b = [ ]
+// CHECK: S1c = [ b a c d ]
+// CHECK: S1d = [ d b a c ]
+
+// The 'sub' operator returns the first argument with the following arguments
+// removed.
+def sub;
+def S2a : Set<(sub S1a, S1c)>;
+def S2b : Set<(sub S1c, S1d)>;
+def S2c : Set<(sub S1c, b)>;
+def S2d : Set<(sub S1c, S0c)>;
+def S2e : Set<(sub S1c, S2d)>;
+// CHECK: S2a = [ ]
+// CHECK: S2b = [ ]
+// CHECK: S2c = [ a c d ]
+// CHECK: S2d = [ c d ]
+// CHECK: S2e = [ b a ]
+
+// The 'and' operator intersects two sets. The result has the same order as the
+// first argument.
+def and;
+def S3a : Set<(and S2d, S2e)>;
+def S3b : Set<(and S2d, S1d)>;
+// CHECK: S3a = [ ]
+// CHECK: S3b = [ c d ]
+
+// The 'shl' operator removes the first N elements.
+def shl;
+def S4a : Set<(shl S0f, 0)>;
+def S4b : Set<(shl S0f, 1)>;
+def S4c : Set<(shl S0f, 3)>;
+def S4d : Set<(shl S0f, 4)>;
+def S4e : Set<(shl S0f, 5)>;
+// CHECK: S4a = [ a b c d ]
+// CHECK: S4b = [ b c d ]
+// CHECK: S4c = [ d ]
+// CHECK: S4d = [ ]
+// CHECK: S4e = [ ]
+
+// The 'trunc' operator truncates after the first N elements.
+def trunc;
+def S5a : Set<(trunc S0f, 0)>;
+def S5b : Set<(trunc S0f, 1)>;
+def S5c : Set<(trunc S0f, 3)>;
+def S5d : Set<(trunc S0f, 4)>;
+def S5e : Set<(trunc S0f, 5)>;
+// CHECK: S5a = [ ]
+// CHECK: S5b = [ a ]
+// CHECK: S5c = [ a b c ]
+// CHECK: S5d = [ a b c d ]
+// CHECK: S5e = [ a b c d ]
+
+// The 'rotl' operator rotates left, but also accepts a negative shift.
+def rotl;
+def S6a : Set<(rotl S0f, 0)>;
+def S6b : Set<(rotl S0f, 1)>;
+def S6c : Set<(rotl S0f, 3)>;
+def S6d : Set<(rotl S0f, 4)>;
+def S6e : Set<(rotl S0f, 5)>;
+def S6f : Set<(rotl S0f, -1)>;
+def S6g : Set<(rotl S0f, -4)>;
+def S6h : Set<(rotl S0f, -5)>;
+// CHECK: S6a = [ a b c d ]
+// CHECK: S6b = [ b c d a ]
+// CHECK: S6c = [ d a b c ]
+// CHECK: S6d = [ a b c d ]
+// CHECK: S6e = [ b c d a ]
+// CHECK: S6f = [ d a b c ]
+// CHECK: S6g = [ a b c d ]
+// CHECK: S6h = [ d a b c ]
+
+// The 'rotr' operator rotates right, but also accepts a negative shift.
+def rotr;
+def S7a : Set<(rotr S0f, 0)>;
+def S7b : Set<(rotr S0f, 1)>;
+def S7c : Set<(rotr S0f, 3)>;
+def S7d : Set<(rotr S0f, 4)>;
+def S7e : Set<(rotr S0f, 5)>;
+def S7f : Set<(rotr S0f, -1)>;
+def S7g : Set<(rotr S0f, -4)>;
+def S7h : Set<(rotr S0f, -5)>;
+// CHECK: S7a = [ a b c d ]
+// CHECK: S7b = [ d a b c ]
+// CHECK: S7c = [ b c d a ]
+// CHECK: S7d = [ a b c d ]
+// CHECK: S7e = [ d a b c ]
+// CHECK: S7f = [ b c d a ]
+// CHECK: S7g = [ a b c d ]
+// CHECK: S7h = [ b c d a ]
+
+// The 'decimate' operator picks every N'th element.
+def decimate;
+def e0;
+def e1;
+def e2;
+def e3;
+def e4;
+def e5;
+def e6;
+def e7;
+def e8;
+def e9;
+def E : Set<(add e0, e1, e2, e3, e4, e5, e6, e7, e8, e9)>;
+def S8a : Set<(decimate E, 3)>;
+def S8b : Set<(decimate E, 9)>;
+def S8c : Set<(decimate E, 10)>;
+def S8d : Set<(decimate (rotl E, 1), 2)>;
+def S8e : Set<(add (decimate E, 2), (decimate (rotl E, 1), 2))>;
+// CHECK: S8a = [ e0 e3 e6 e9 ]
+// CHECK: S8b = [ e0 e9 ]
+// CHECK: S8c = [ e0 ]
+// CHECK: S8d = [ e1 e3 e5 e7 e9 ]
+// CHECK: S8e = [ e0 e2 e4 e6 e8 e1 e3 e5 e7 e9 ]
+
+// The 'sequence' operator finds a sequence of records from their name.
+def sequence;
+def S9a : Set<(sequence "e%u", 3, 7)>;
+def S9b : Set<(sequence "e%u", 7, 3)>;
+def S9c : Set<(sequence "e%u", 0, 0)>;
+def S9d : Set<(sequence "S%ua", 7, 9)>;
+// CHECK: S9a = [ e3 e4 e5 e6 e7 ]
+// CHECK: S9b = [ e7 e6 e5 e4 e3 ]
+// CHECK: S9c = [ e0 ]
+// CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ]
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 82d7324..3b6010a 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -7,6 +7,7 @@ declare double @sin(double)
 declare double @tan(double)
 
 declare double @sqrt(double)
+declare double @exp2(double)
 
 define double @T() {
 ; CHECK: @T
@@ -19,7 +20,11 @@ define double @T() {
   %b = fadd double %a, %C
   %D = call double @sqrt(double 4.000000e+00)
   %c = fadd double %b, %D
-  ret double %c
+
+  ; PR9315
+  %E = call double @exp2(double 4.0)
+  %d = fadd double %c, %E 
+  ret double %d
 }
 
 define i1 @test_sse_cvt() nounwind readnone {
diff --git a/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll b/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
index adfd019..858c935 100644
--- a/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
+++ b/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
@@ -2,7 +2,7 @@
 ; RUN: cat %t | grep 123
 
 ; This test tries to catch wrongful removal of return values for a specific case
-; that was break llvm-gcc builds.
+; that was breaking llvm-gcc builds.
 
 ; This function has a live return value, it is used by @alive.
 define internal i32 @test5() {
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index a61eac9..23576dad 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -236,3 +236,20 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
 }
+
+
+; The store here is not dead because the byval call reads it.
+declare void @test19f({i32}* byval align 4 %P)
+
+define void @test19({i32} * nocapture byval align 4 %arg5) nounwind ssp {
+bb:
+  %tmp7 = getelementptr inbounds {i32}* %arg5, i32 0, i32 0
+  store i32 912, i32* %tmp7
+  call void @test19f({i32}* byval align 4 %arg5)
+  ret void
+
+; CHECK: @test19(
+; CHECK: store i32 912
+; CHECK: call void @test19f
+}
+
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index 14cb91b..a1cc008 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -4,8 +4,11 @@
 
 define i32 @NextRootMove(i32 %wtm) {
 entry:
+        %A = alloca i32*
 	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17618, i32** %A
 ; CHECK: entry:
+; CHECK-NEXT: alloca i32
 ; CHECK-NEXT: %tmp17618 = load
 ; CHECK-NOT: load
 ; CHECK-NOT: phi
@@ -16,6 +19,7 @@ cond_true116:
 
 cond_true128:
 	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17625, i32** %A
 	br i1 false, label %bb98.backedge, label %return.loopexit
 
 bb98.backedge:
@@ -23,6 +27,7 @@ bb98.backedge:
 
 cond_true145:
 	%tmp17631 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17631, i32** %A
 	br i1 false, label %bb98.backedge, label %return.loopexit
 
 return.loopexit:
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index be69cfc..407940b 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -5,6 +5,7 @@
 
 define i8 @func_1() nounwind  {
 entry:
+  %A = alloca i8
 	br i1 false, label %ifelse, label %ifthen
 
 ifthen:		; preds = %entry
@@ -12,6 +13,7 @@ ifthen:		; preds = %entry
 
 ifelse:		; preds = %entry
 	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
+        store i8 %tmp3, i8* %A
 	br label %forcond.thread
 
 forcond.thread:		; preds = %ifelse
diff --git a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
new file mode 100644
index 0000000..f24e956
--- /dev/null
+++ b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+; This test is checking that (a) this doesn't crash, and (b) we don't
+; conclude the value of %tmp17 is available in bb1.bb15_crit_edge.
+; rdar://9429882
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define i1 @rb_intern() nounwind ssp {
+; CHECK: @rb_intern
+
+bb:
+  %tmp = alloca i8*, align 8
+  store i8* null, i8** %tmp, align 8
+  store i8 undef, i8* null, align 536870912
+  br label %bb1
+
+bb1:
+  br i1 undef, label %bb3, label %bb15
+
+; CHECK: bb1:
+; CHECK: %tmp16 = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
+
+; CHECK: bb1.bb15_crit_edge:
+; CHECK: %tmp17.pre = load i8* %tmp16, align 1
+
+bb3:
+  call void @isalnum()
+  br i1 undef, label %bb10, label %bb5
+
+bb5:
+  br i1 undef, label %bb10, label %bb6
+
+bb6:
+  %tmp7 = load i8** %tmp, align 8
+  %tmp8 = load i8* %tmp7, align 1
+  %tmp9 = zext i8 %tmp8 to i64
+  br i1 undef, label %bb15, label %bb10
+
+bb10:
+  %tmp11 = load i8** %tmp, align 8
+  %tmp12 = load i8* %tmp11, align 1
+  %tmp13 = zext i8 %tmp12 to i64
+  %tmp14 = getelementptr inbounds i8* null, i64 undef
+  store i8* %tmp14, i8** %tmp, align 8
+  br label %bb1
+
+bb15:
+  %tmp16 = load i8** %tmp, align 8
+  %tmp17 = load i8* %tmp16, align 1
+  %tmp18 = icmp eq i8 %tmp17, 0
+  br label %bb19
+
+; CHECK: bb15:
+; CHECK: %tmp17 = phi i8 [ %tmp17.pre, %bb1.bb15_crit_edge ], [ %tmp8, %bb6 ]
+
+bb19:                                             ; preds = %bb15
+  ret i1 %tmp18
+}
+
+declare void @isalnum() nounwind inlinehint ssp
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 4a3aa1c..31eae25 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -151,3 +151,15 @@ dead:
 dead2:
   ret i32 %A
 }
+
+
+; PR9841
+define fastcc i8 @test5(i8* %P) nounwind {
+entry:
+  %0 = load i8* %P, align 2
+
+  %Q = getelementptr i8* %P, i32 1
+  %1 = load i8* %Q, align 1
+  ret i8 %1
+}
+
diff --git a/test/Transforms/GVN/mixed.ll b/test/Transforms/GVN/mixed.ll
deleted file mode 100644
index 6bfada2..0000000
--- a/test/Transforms/GVN/mixed.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -basicaa -gvn -S | not grep DEADLOAD
-; RUN: opt < %s -basicaa -gvn -S | not grep DEADGEP
-
-define i32 @main(i32** %p) {
-block1:
-	%z1 = load i32** %p
-	%z2 = getelementptr i32* %z1, i32 0
-	%z3 = load i32* %z2
-	%DEADLOAD = load i32** %p
-	%DEADGEP = getelementptr i32* %DEADLOAD, i32 0
-	%DEADLOAD2 = load i32* %DEADGEP
-	ret i32 %DEADLOAD2
-}
diff --git a/test/Transforms/GVN/phi-translate-partial-alias.ll b/test/Transforms/GVN/phi-translate-partial-alias.ll
new file mode 100644
index 0000000..47bec41
--- /dev/null
+++ b/test/Transforms/GVN/phi-translate-partial-alias.ll
@@ -0,0 +1,27 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+; GVN shouldn't PRE the load around the loop backedge because it's
+; not actually redundant around the loop backedge, despite appearances
+; if phi-translation is ignored.
+
+; CHECK: define void @test0(i8* %begin)
+; CHECK: loop:
+; CHECK:   %l0 = load i8* %phi
+; CHECK:   call void @bar(i8 %l0)
+; CHECK:   %l1 = load i8* %phi
+define void @test0(i8* %begin) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi i8* [ %begin, %entry ], [ %next, %loop ]
+  %l0 = load i8* %phi
+  call void @bar(i8 %l0)
+  %l1 = load i8* %phi
+  %next = getelementptr inbounds i8* %phi, i8 %l1
+  br label %loop
+}
+
+declare void @bar(i8)
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
index 2fcfc47..a936755 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -5,9 +5,9 @@ target datalayout = "e-p:64:64:64"
 ; GVN should preserve the TBAA tag on loads when doing PRE.
 
 ; CHECK: @test
-; CHECK: %tmp33.pre = load i16* undef, align 2, !tbaa !0
+; CHECK: %tmp33.pre = load i16* %P, align 2, !tbaa !0
 ; CHECK: br label %for.body
-define void @test() nounwind {
+define void @test(i16 *%P, i16* %Q) nounwind {
 entry:
   br i1 undef, label %bb.nph, label %for.end
 
@@ -15,8 +15,10 @@ bb.nph:                                           ; preds = %entry
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %bb.nph
-  %tmp33 = load i16* undef, align 2, !tbaa !0
-  store i16 undef, i16* undef, align 2, !tbaa !0
+  %tmp33 = load i16* %P, align 2, !tbaa !0
+  store i16 %tmp33, i16* %Q
+
+  store i16 0, i16* %P, align 2, !tbaa !0
   br i1 false, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 4ff5bec..28b1fc7 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -360,8 +360,11 @@ Cont:
 
 define i32 @chained_load(i32** %p) {
 block1:
+  %A = alloca i32*
+
   %z = load i32** %p
-	br i1 true, label %block2, label %block3
+  store i32* %z, i32** %A
+  br i1 true, label %block2, label %block3
 
 block2:
  %a = load i32** %p
@@ -559,14 +562,14 @@ entry:
   %add = add nsw i32 %tmp2, %conv
   ret i32 %add
 
-; CHECK: @load_load_partial_alias
-; CHECK: load i32*
-; CHECK-NOT: load
-; CHECK: lshr i32 {{.*}}, 8
-; CHECK-NOT: load
-; CHECK: trunc i32 {{.*}} to i8
-; CHECK-NOT: load
-; CHECK: ret i32
+; TEMPORARILYDISABLED: @load_load_partial_alias
+; TEMPORARILYDISABLED: load i32*
+; TEMPORARILYDISABLED-NOT: load
+; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
+; TEMPORARILYDISABLED-NOT: load
+; TEMPORARILYDISABLED: trunc i32 {{.*}} to i8
+; TEMPORARILYDISABLED-NOT: load
+; TEMPORARILYDISABLED: ret i32
 }
 
 
@@ -586,10 +589,10 @@ land.lhs.true:                                    ; preds = %entry
 
 if.end:
   ret i32 52
-; CHECK: @load_load_partial_alias_cross_block
-; CHECK: land.lhs.true:
-; CHECK-NOT: load i8
-; CHECK: ret i32 %conv6
+; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block
+; TEMPORARILY_DISABLED: land.lhs.true:
+; TEMPORARILY_DISABLED-NOT: load i8
+; TEMPORARILY_DISABLED: ret i32 %conv6
 }
 
 
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index 9da5a5e..366a874 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -64,3 +64,17 @@ define void @memset_with_strange_user() ssp {
   ret void
 }
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+
+; PR9856
+@g_52 = internal global i32** null, align 8
+@g_90 = external global i32*, align 8
+
+define void @icmp_user_of_stored_once() nounwind ssp {
+entry:
+  %tmp4 = load i32*** @g_52, align 8
+  store i32** @g_90, i32*** @g_52
+  %cmp17 = icmp ne i32*** undef, @g_52
+  ret void
+}
+
diff --git a/test/Transforms/GlobalOpt/memset-null.ll b/test/Transforms/GlobalOpt/memset-null.ll
new file mode 100644
index 0000000..01534025
--- /dev/null
+++ b/test/Transforms/GlobalOpt/memset-null.ll
@@ -0,0 +1,29 @@
+; RUN: opt -globalopt %s -S -o - | FileCheck %s
+; PR10047
+
+%0 = type { i32, void ()* }
+%struct.A = type { [100 x i32] }
+
+; CHECK: @a
+@a = global %struct.A zeroinitializer, align 4
+@llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }, %0 { i32 65535, void ()* @_GLOBAL__I_b }]
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; CHECK-NOT: GLOBAL__I_a
+define internal void @_GLOBAL__I_a() nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.A* @a to i8*), i8 0, i64 400, i32 4, i1 false) nounwind
+  ret void
+}
+
+%struct.X = type { i8 }
+@y = global i8* null, align 8
+@x = global %struct.X zeroinitializer, align 1
+
+define internal void @_GLOBAL__I_b() nounwind {
+entry:
+  %tmp.i.i.i = load i8** @y, align 8
+  tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i32 1, i1 false) nounwind
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll
new file mode 100644
index 0000000..0367e11
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -0,0 +1,153 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; IV with constant start, preinc and postinc sign extends, with and without NSW.
+; IV rewrite only removes one sext. WidenIVs removes all three.
+define void @postincConstIV(i8* %base, i32 %limit) nounwind {
+entry:
+  br label %loop
+; CHECK: loop:
+; CHECK-NOT: sext
+; CHECK: exit:
+loop:
+  %iv = phi i32 [ %postiv, %loop ], [ 0, %entry ]
+  %ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ]
+  %preofs = sext i32 %iv to i64
+  %preadr = getelementptr i8* %base, i64 %preofs
+  store i8 0, i8* %preadr
+  %postiv = add i32 %iv, 1
+  %postofs = sext i32 %postiv to i64
+  %postadr = getelementptr i8* %base, i64 %postofs
+  store i8 0, i8* %postadr
+  %postivnsw = add nsw i32 %ivnsw, 1
+  %postofsnsw = sext i32 %postivnsw to i64
+  %postadrnsw = getelementptr i8* %base, i64 %postofsnsw
+  store i8 0, i8* %postadrnsw
+  %cond = icmp sgt i32 %limit, %iv
+  br i1 %cond, label %loop, label %exit
+exit:
+  br label %return
+return:
+  ret void
+}
+
+; IV with nonconstant start, preinc and postinc sign extends,
+; with and without NSW.
+; As with postincConstIV, WidenIVs removes all three sexts.
+define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
+entry:
+  %precond = icmp sgt i32 %limit, %init
+  br i1 %precond, label %loop, label %return
+; CHECK: loop:
+; CHECK-NOT: sext
+; CHECK: exit:
+loop:
+  %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
+  %ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ]
+  %preofs = sext i32 %iv to i64
+  %preadr = getelementptr i8* %base, i64 %preofs
+  store i8 0, i8* %preadr
+  %postiv = add i32 %iv, 1
+  %postofs = sext i32 %postiv to i64
+  %postadr = getelementptr i8* %base, i64 %postofs
+  store i8 0, i8* %postadr
+  %postivnsw = add nsw i32 %ivnsw, 1
+  %postofsnsw = sext i32 %postivnsw to i64
+  %postadrnsw = getelementptr i8* %base, i64 %postofsnsw
+  store i8 0, i8* %postadrnsw
+  %cond = icmp sgt i32 %limit, %postiv
+  br i1 %cond, label %loop, label %exit
+exit:
+  br label %return
+return:
+  ret void
+}
+
+; Test sign extend elimination in the inner and outer loop.
+; %outercount is straightforward to widen, besides being in an outer loop.
+; %innercount is currently blocked by lcssa, so is not widened.
+; %inneriv can be widened only after proving it has no signed-overflow
+;   based on the loop test.
+define void @nestedIV(i8* %address, i32 %limit) nounwind {
+entry:
+  %limitdec = add i32 %limit, -1
+  br label %outerloop
+
+; CHECK: outerloop:
+;
+; Eliminate %ofs1 after widening outercount.
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; IV rewriting hoists a gep into this block. We don't like that.
+; CHECK-NOT: getelementptr
+outerloop:
+  %outercount   = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ]
+  %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ]
+
+  %outercountdec = add i32 %outercount, -1
+  %ofs1 = sext i32 %outercountdec to i64
+  %adr1 = getelementptr i8* %address, i64 %ofs1
+  store i8 0, i8* %adr1
+
+  br label %innerpreheader
+
+innerpreheader:
+  %innerprecmp = icmp sgt i32 %limitdec, %innercount
+  br i1 %innerprecmp, label %innerloop, label %outermerge
+
+; CHECK: innerloop:
+;
+; Eliminate %ofs2 after widening inneriv.
+; Eliminate %ofs3 after normalizing sext(innerpostiv)
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; FIXME: We should check that indvars does not increase the number of
+; IVs in this loop. sext elimination plus LFTR currently results in 2 final
+; IVs. Waiting to remove LFTR.
+innerloop:
+  %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
+  %innerpostiv = add i32 %inneriv, 1
+
+  %ofs2 = sext i32 %inneriv to i64
+  %adr2 = getelementptr i8* %address, i64 %ofs2
+  store i8 0, i8* %adr2
+
+  %ofs3 = sext i32 %innerpostiv to i64
+  %adr3 = getelementptr i8* %address, i64 %ofs3
+  store i8 0, i8* %adr3
+
+  %innercmp = icmp sgt i32 %limitdec, %innerpostiv
+  br i1 %innercmp, label %innerloop, label %innerexit
+
+innerexit:
+  %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ]
+  br label %outermerge
+
+; CHECK: outermerge:
+;
+; Eliminate %ofs4 after widening outercount
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; TODO: Eliminate %ofs5 after removing lcssa
+outermerge:
+  %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ]
+
+  %ofs4 = sext i32 %outercount to i64
+  %adr4 = getelementptr i8* %address, i64 %ofs4
+  store i8 0, i8* %adr4
+
+  %ofs5 = sext i32 %innercount.merge to i64
+  %adr5 = getelementptr i8* %address, i64 %ofs5
+  store i8 0, i8* %adr5
+
+  %outerpostcount = add i32 %outercount, 1
+  %tmp47 = icmp slt i32 %outerpostcount, %limit
+  br i1 %tmp47, label %outerloop, label %return
+
+return:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/iv-sext.ll b/test/Transforms/IndVarSimplify/iv-sext.ll
index 3e90873..6c7a627 100644
--- a/test/Transforms/IndVarSimplify/iv-sext.ll
+++ b/test/Transforms/IndVarSimplify/iv-sext.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -indvars -S > %t
-; RUN: grep {= sext} %t | count 4
-; RUN: grep {phi i64} %t | count 2
+; RUN: opt < %s -indvars -S | FileCheck %s
 
 ; Indvars should be able to promote the hiPart induction variable in the
 ; inner loop to i64.
@@ -18,6 +16,9 @@ bb.nph22:		; preds = %entry
 	%tmp3 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=2]
 	br label %bb
 
+; CHECK: bb:
+; CHECK: phi i64
+; CHECK-NOT: phi i64
 bb:		; preds = %bb8, %bb.nph22
 	%distERBhi.121 = phi float [ %distERBhi.2.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
 	%distERBlo.120 = phi float [ %distERBlo.0.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
@@ -28,6 +29,7 @@ bb:		; preds = %bb8, %bb.nph22
 	%tmp4 = icmp sgt i32 %part.016, 0		; <i1> [#uses=1]
 	br i1 %tmp4, label %bb1, label %bb3.preheader
 
+; CHECK: bb1:
 bb1:		; preds = %bb
 	%tmp5 = add i32 %part.016, -1		; <i32> [#uses=1]
 	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
@@ -86,7 +88,10 @@ bb5.preheader:		; preds = %bb3.bb5.preheader_crit_edge, %bb3.preheader
 
 bb.nph12:		; preds = %bb5.preheader
 	br label %bb4
-
+; CHECK: bb4:
+; CHECK: phi i64
+; CHECK-NOT: phi i64
+; CHECK-NOT: sext
 bb4:		; preds = %bb5, %bb.nph12
 	%distERBhi.29 = phi float [ %tmp30, %bb5 ], [ %distERBhi.0.ph, %bb.nph12 ]		; <float> [#uses=1]
 	%hiPart.08 = phi i32 [ %tmp31, %bb5 ], [ %hiPart.119, %bb.nph12 ]		; <i32> [#uses=2]
@@ -102,6 +107,7 @@ bb4:		; preds = %bb5, %bb.nph12
 	%tmp35 = fadd float %tmp34, %peakCount.27		; <float> [#uses=2]
 	br label %bb5
 
+; CHECK: bb5:
 bb5:		; preds = %bb4
 	%.not = fcmp olt float %tmp30, 2.500000e+00		; <i1> [#uses=1]
 	%tmp36 = icmp sgt i32 %tmp3, %tmp31		; <i1> [#uses=1]
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 80a77b6..00018ec 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -indvars -S > %t
-; RUN: not grep and %t
-; RUN: not grep zext %t
+; RUN: opt < %s -indvars -S | FileCheck %s
+; CHECK-NOT: and
+; CHECK-NOT: zext
 
 target datalayout = "-p:64:64:64-n:32:64"
 
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
new file mode 100644
index 0000000..c35feef
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
+; Make sure that indvars isn't inserting canonical IVs.
+; This is kinda hard to do until linear function test replacement is removed.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i32 @sum(i32* %arr, i32 %n) nounwind {
+entry:
+  %precond = icmp slt i32 0, %n
+  br i1 %precond, label %ph, label %return
+
+ph:
+  br label %loop
+
+; CHECK: loop:
+;
+; We should only have 2 IVs.
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
+;
+; sext should be eliminated while preserving gep inboundsness.
+; CHECK-NOT: sext
+; CHECK: getelementptr inbounds
+loop:
+  %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
+  %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
+  %ofs = sext i32 %i.02 to i64
+  %adr = getelementptr inbounds i32* %arr, i64 %ofs
+  %val = load i32* %adr
+  %sinc = add nsw i32 %s.01, %val
+  %iinc = add nsw i32 %i.02, 1
+  %cond = icmp slt i32 %iinc, %n
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  %s.lcssa = phi i32 [ %sinc, %loop ]
+  br label %return
+
+return:
+  %s.0.lcssa = phi i32 [ %s.lcssa, %exit ], [ 0, %entry ]
+  ret i32 %s.0.lcssa
+}
+
+define i64 @suml(i32* %arr, i32 %n) nounwind {
+entry:
+  %precond = icmp slt i32 0, %n
+  br i1 %precond, label %ph, label %return
+
+ph:
+  br label %loop
+
+; CHECK: loop:
+;
+; We should only have 2 IVs.
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
+;
+; %ofs sext should be eliminated while preserving gep inboundsness.
+; CHECK-NOT: sext
+; CHECK: getelementptr inbounds
+; %vall sext should obviously not be eliminated
+; CHECK: sext
+loop:
+  %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
+  %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
+  %ofs = sext i32 %i.02 to i64
+  %adr = getelementptr inbounds i32* %arr, i64 %ofs
+  %val = load i32* %adr
+  %vall = sext i32 %val to i64
+  %sinc = add nsw i64 %s.01, %vall
+  %iinc = add nsw i32 %i.02, 1
+  %cond = icmp slt i32 %iinc, %n
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  %s.lcssa = phi i64 [ %sinc, %loop ]
+  br label %return
+
+return:
+  %s.0.lcssa = phi i64 [ %s.lcssa, %exit ], [ 0, %entry ]
+  ret i64 %s.0.lcssa
+}
+
+define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind {
+  %precond = icmp ne i32* %first, %last
+  br i1 %precond, label %ph, label %return
+
+; CHECK: ph:
+; It's not indvars' job to perform LICM on %ofs
+; CHECK-NOT: sext
+ph:
+  br label %loop
+
+; CHECK: loop:
+;
+; Preserve exactly one pointer type IV.
+; CHECK: phi i32*
+; CHECK-NOT: phi
+;
+; Don't create any extra adds.
+; CHECK-NOT: add
+;
+; Preserve gep inboundsness, and don't factor it.
+; CHECK: getelementptr inbounds i32* %ptriv, i32 1
+; CHECK-NOT: add
+loop:
+  %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ]
+  %ofs = sext i32 %idx to i64
+  %adr = getelementptr inbounds i32* %ptriv, i64 %ofs
+  store i32 3, i32* %adr
+  %ptrpost = getelementptr inbounds i32* %ptriv, i32 1
+  %cond = icmp ne i32* %ptrpost, %last
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
index 34d432b..52c9e5c 100644
--- a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -1,4 +1,8 @@
-; RUN: opt < %s -indvars
+; RUN: opt < %s -indvars -disable-output -stats -info-output-file - | FileCheck %s
+; Check that IndVarSimplify is not creating unnecessary canonical IVs
+; that will never be used.
+; CHECK-NOT: indvars
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 @ue = external global i64
diff --git a/test/Transforms/Inline/array_merge.ll b/test/Transforms/Inline/array_merge.ll
index 0d176b8..b2eafeb 100644
--- a/test/Transforms/Inline/array_merge.ll
+++ b/test/Transforms/Inline/array_merge.ll
@@ -19,7 +19,7 @@ entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: %A.i = alloca
 ; CHECK-NEXT: %B.i = alloca
-; CHECK-NEXT: call void
+; CHECK-NOT: alloca
   call void @foo() nounwind
   call void @foo() nounwind
   ret void
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
new file mode 100644
index 0000000..2a1b883
--- /dev/null
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -0,0 +1,336 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Test that the inliner correctly handles inlining into invoke sites
+; by appending selectors and forwarding _Unwind_Resume directly to the
+; enclosing landing pad.
+
+;; Test 0 - basic functionality.
+
+%struct.A = type { i8 }
+
+@_ZTIi = external constant i8*
+
+declare void @_ZN1AC1Ev(%struct.A*)
+
+declare void @_ZN1AD1Ev(%struct.A*)
+
+declare void @use(i32) nounwind
+
+declare void @opaque()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+define internal void @test0_in() alwaysinline uwtable ssp {
+entry:
+  %a = alloca %struct.A, align 1
+  %b = alloca %struct.A, align 1
+  call void @_ZN1AC1Ev(%struct.A* %a)
+  invoke void @_ZN1AC1Ev(%struct.A* %b)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  invoke void @_ZN1AD1Ev(%struct.A* %b)
+          to label %invoke.cont1 unwind label %lpad
+
+invoke.cont1:
+  call void @_ZN1AD1Ev(%struct.A* %a)
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) nounwind
+  invoke void @_ZN1AD1Ev(%struct.A* %a)
+          to label %invoke.cont2 unwind label %terminate.lpad
+
+invoke.cont2:
+  call void @llvm.eh.resume(i8* %exn, i32 %eh.selector) noreturn
+  unreachable
+
+terminate.lpad:
+  %exn3 = call i8* @llvm.eh.exception() nounwind
+  %eh.selector4 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+define void @test0_out() uwtable ssp {
+entry:
+  invoke void @test0_in()
+          to label %ret unwind label %lpad
+
+ret:
+  ret void
+
+lpad:                                             ; preds = %entry
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %1 = icmp eq i32 %eh.selector, %0
+  br i1 %1, label %catch, label %eh.resume
+
+catch:
+  %ignored = call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  call void @__cxa_end_catch() nounwind
+  br label %ret
+
+eh.resume:
+  call void @llvm.eh.resume(i8* %exn, i32 %eh.selector) noreturn
+  unreachable
+}
+
+; CHECK:    define void @test0_out()
+; CHECK:      [[A:%.*]] = alloca %struct.A,
+; CHECK:      [[B:%.*]] = alloca %struct.A,
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[A]])
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[B]])
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[B]])
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[A]])
+; CHECK:      call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* {{%.*}}, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0, i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]])
+; CHECK-NEXT:   to label %[[LBL:[^\s]+]] unwind
+; CHECK: [[LBL]]:
+; CHECK-NEXT: br label %[[LPAD:[^\s]+]]
+; CHECK:      ret void
+; CHECK:      call i8* @llvm.eh.exception()
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* {{%.*}}, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: br label %[[LPAD]]
+; CHECK: [[LPAD]]:
+; CHECK-NEXT: phi i8* [
+; CHECK-NEXT: phi i32 [
+; CHECK-NEXT: call i32 @llvm.eh.typeid.for(
+
+
+;; Test 1 - Correctly handle phis in outer landing pads.
+
+define void @test1_out() uwtable ssp {
+entry:
+  invoke void @test0_in()
+          to label %cont unwind label %lpad
+
+cont:
+  invoke void @test0_in()
+          to label %ret unwind label %lpad
+
+ret:
+  ret void
+
+lpad:
+  %x = phi i32 [ 0, %entry ], [ 1, %cont ]
+  %y = phi i32 [ 1, %entry ], [ 4, %cont ]
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %1 = icmp eq i32 %eh.selector, %0
+  br i1 %1, label %catch, label %eh.resume
+
+catch:
+  %ignored = call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  call void @use(i32 %x)
+  call void @use(i32 %y)
+  call void @__cxa_end_catch() nounwind
+  br label %ret
+
+eh.resume:
+  call void @llvm.eh.resume(i8* %exn, i32 %eh.selector) noreturn
+  unreachable
+}
+
+; CHECK:    define void @test1_out()
+; CHECK:      [[A2:%.*]] = alloca %struct.A,
+; CHECK:      [[B2:%.*]] = alloca %struct.A,
+; CHECK:      [[A1:%.*]] = alloca %struct.A,
+; CHECK:      [[B1:%.*]] = alloca %struct.A,
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[A1]])
+; CHECK-NEXT:   unwind label %[[LPAD:[^\s]+]]
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[B1]])
+; CHECK-NEXT:   unwind label %[[LPAD1:[^\s]+]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[B1]])
+; CHECK-NEXT:   unwind label %[[LPAD1]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
+; CHECK-NEXT:   unwind label %[[LPAD]]
+
+; Inner landing pad from first inlining.
+; CHECK:    [[LPAD1]]:
+; CHECK-NEXT: [[EXN1:%.*]] = call i8* @llvm.eh.exception()
+; CHECK-NEXT: [[SEL1:%.*]] = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* [[EXN1]], i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0, i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
+; CHECK-NEXT:   to label %[[RESUME1:[^\s]+]] unwind
+; CHECK: [[RESUME1]]:
+; CHECK-NEXT: br label %[[LPAD_JOIN1:[^\s]+]]
+
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[A2]])
+; CHECK-NEXT:   unwind label %[[LPAD]]
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[B2]])
+; CHECK-NEXT:   unwind label %[[LPAD2:[^\s]+]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[B2]])
+; CHECK-NEXT:   unwind label %[[LPAD2]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
+; CHECK-NEXT:   unwind label %[[LPAD]]
+
+; Inner landing pad from second inlining.
+; CHECK:    [[LPAD2]]:
+; CHECK-NEXT: [[EXN2:%.*]] = call i8* @llvm.eh.exception()
+; CHECK-NEXT: [[SEL2:%.*]] = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* [[EXN2]], i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0, i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
+; CHECK-NEXT:   to label %[[RESUME2:[^\s]+]] unwind
+; CHECK: [[RESUME2]]:
+; CHECK-NEXT: br label %[[LPAD_JOIN2:[^\s]+]]
+
+; CHECK:      ret void
+
+; CHECK:    [[LPAD]]:
+; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, %entry ], [ 0, {{%.*}} ], [ 1, %cont ], [ 1, {{%.*}} ]
+; CHECK-NEXT: [[Y:%.*]] = phi i32 [ 1, %entry ], [ 1, {{%.*}} ], [ 4, %cont ], [ 4, {{%.*}} ]
+; CHECK-NEXT: [[EXN:%.*]] = call i8* @llvm.eh.exception()
+; CHECK-NEXT: [[SEL:%.*]] = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* [[EXN]], i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: br label %[[LPAD_JOIN2]]
+
+; CHECK: [[LPAD_JOIN2]]:
+; CHECK-NEXT: [[XJ2:%.*]] = phi i32 [ [[X]], %[[LPAD]] ], [ 1, %[[RESUME2]] ]
+; CHECK-NEXT: [[YJ2:%.*]] = phi i32 [ [[Y]], %[[LPAD]] ], [ 4, %[[RESUME2]] ]
+; CHECK-NEXT: [[EXNJ2:%.*]] = phi i8* [ [[EXN]], %[[LPAD]] ], [ [[EXN2]], %[[RESUME2]] ]
+; CHECK-NEXT: [[SELJ2:%.*]] = phi i32 [ [[SEL]], %[[LPAD]] ], [ [[SEL2]], %[[RESUME2]] ]
+; CHECK-NEXT: br label %[[LPAD_JOIN1]]
+
+; CHECK: [[LPAD_JOIN1]]:
+; CHECK-NEXT: [[XJ1:%.*]] = phi i32 [ [[XJ2]], %[[LPAD_JOIN2]] ], [ 0, %[[RESUME1]] ]
+; CHECK-NEXT: [[YJ1:%.*]] = phi i32 [ [[YJ2]], %[[LPAD_JOIN2]] ], [ 1, %[[RESUME1]] ]
+; CHECK-NEXT: [[EXNJ1:%.*]] = phi i8* [ [[EXNJ2]], %[[LPAD_JOIN2]] ], [ [[EXN1]], %[[RESUME1]] ]
+; CHECK-NEXT: [[SELJ1:%.*]] = phi i32 [ [[SELJ2]], %[[LPAD_JOIN2]] ], [ [[SEL1]], %[[RESUME1]] ]
+; CHECK-NEXT: [[T:%.*]] = call i32 @llvm.eh.typeid.for(
+; CHECK-NEXT: icmp eq i32 [[SELJ1]], [[T]]
+
+; CHECK:      call void @use(i32 [[XJ1]])
+; CHECK:      call void @use(i32 [[YJ1]])
+
+; CHECK:      call void @llvm.eh.resume(i8* [[EXNJ1]], i32 [[SELJ1]])
+
+
+;; Test 2 - Don't make invalid IR for inlines into landing pads without eh.exception calls
+define void @test2_out() uwtable ssp {
+entry:
+  invoke void @test0_in()
+          to label %ret unwind label %lpad
+
+ret:
+  ret void
+
+lpad:
+  call void @_ZSt9terminatev()
+  unreachable
+}
+
+; CHECK: define void @test2_out()
+; CHECK:      [[A:%.*]] = alloca %struct.A,
+; CHECK:      [[B:%.*]] = alloca %struct.A,
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[A]])
+; CHECK-NEXT:   unwind label %[[LPAD:[^\s]+]]
+; CHECK:      invoke void @_ZN1AC1Ev(%struct.A* [[B]])
+; CHECK-NEXT:   unwind label %[[LPAD2:[^\s]+]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[B]])
+; CHECK-NEXT:   unwind label %[[LPAD2]]
+; CHECK:      invoke void @_ZN1AD1Ev(%struct.A* [[A]])
+; CHECK-NEXT:   unwind label %[[LPAD]]
+
+
+;; Test 3 - Deal correctly with split unwind edges.
+define void @test3_out() uwtable ssp {
+entry:
+  invoke void @test0_in()
+          to label %ret unwind label %lpad
+
+ret:
+  ret void
+
+lpad:
+  br label %lpad.cont
+
+lpad.cont:
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  call void @_ZSt9terminatev()
+  unreachable
+}
+
+; CHECK: define void @test3_out()
+; CHECK:      call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* {{%.*}}, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0, i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: invoke void @_ZN1AD1Ev(
+; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
+; CHECK:    [[L]]:
+; CHECK-NEXT: br label %[[JOIN:[^\s]+]]
+; CHECK:    [[JOIN]]:
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: br label %lpad.cont
+; CHECK:    lpad.cont:
+; CHECK-NEXT: call void @_ZSt9terminatev()
+
+
+;; Test 4 - Split unwind edges with a dominance problem
+define void @test4_out() uwtable ssp {
+entry:
+  invoke void @test0_in()
+          to label %cont unwind label %lpad.crit
+
+cont:
+  invoke void @opaque()
+          to label %ret unwind label %lpad
+
+ret:
+  ret void
+
+lpad.crit:
+  call void @opaque() nounwind
+  br label %lpad
+
+lpad:
+  %phi = phi i32 [ 0, %lpad.crit ], [ 1, %cont ]
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  call void @use(i32 %phi)
+  call void @_ZSt9terminatev()
+  unreachable
+}
+
+; CHECK: define void @test4_out()
+; CHECK:      call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* {{%.*}}, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0, i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: invoke void @_ZN1AD1Ev(
+; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
+; CHECK:    [[L]]:
+; CHECK-NEXT: br label %[[JOIN:[^\s]+]]
+; CHECK:      invoke void @opaque()
+; CHECK-NEXT:                  unwind label %lpad
+; CHECK:    lpad.crit:
+; CHECK-NEXT: call i8* @llvm.eh.exception()
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: br label %[[JOIN]]
+; CHECK:    [[JOIN]]:
+; CHECK-NEXT: phi i8*
+; CHECK-NEXT: phi i32
+; CHECK-NEXT: call void @opaque() nounwind
+; CHECK-NEXT: br label %[[FIX:[^\s]+]]
+; CHECK:    lpad:
+; CHECK-NEXT: [[T0:%.*]] = phi i32 [ 1, %cont ]
+; CHECK-NEXT: call i8* @llvm.eh.exception() nounwind
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*))
+; CHECK-NEXT: br label %[[FIX]]
+; CHECK:    [[FIX]]:
+; CHECK-NEXT: [[T1:%.*]] = phi i32 [ [[T0]], %lpad ], [ 0, %[[JOIN]] ]
+; CHECK-NEXT: phi i8*
+; CHECK-NEXT: phi i32
+; CHECK-NEXT: call void @use(i32 [[T1]])
+; CHECK-NEXT: call void @_ZSt9terminatev()
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
new file mode 100644
index 0000000..a95c836
--- /dev/null
+++ b/test/Transforms/Inline/lifetime.ll
@@ -0,0 +1,78 @@
+; RUN: opt -inline %s -S -o - | FileCheck %s
+
+declare void @llvm.lifetime.start(i64, i8*)
+declare void @llvm.lifetime.end(i64, i8*)
+
+define void @helper_both_markers() {
+  %a = alloca i8
+  call void @llvm.lifetime.start(i64 1, i8* %a)
+  call void @llvm.lifetime.end(i64 1, i8* %a)
+  ret void
+}
+
+define void @test_both_markers() {
+; CHECK: @test_both_markers
+; CHECK: llvm.lifetime.start(i64 1
+; CHECK-NEXT: llvm.lifetime.end(i64 1
+  call void @helper_both_markers()
+; CHECK-NEXT: llvm.lifetime.start(i64 1
+; CHECK-NEXT: llvm.lifetime.end(i64 1
+  call void @helper_both_markers()
+; CHECK-NEXT: ret void
+  ret void
+}
+
+;; Without this, the inliner will simplify out @test_no_marker before adding
+;; any lifetime markers.
+declare void @use(i8* %a)
+
+define void @helper_no_markers() {
+  %a = alloca i8
+  call void @use(i8* %a)
+  ret void
+}
+
+;; We can't use CHECK-NEXT because there's an extra call void @use in between.
+;; Instead, we use CHECK-NOT to verify that there are no other lifetime calls.
+define void @test_no_marker() {
+; CHECK: @test_no_marker
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 -1
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 -1
+  call void @helper_no_markers()
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 -1
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 -1
+  call void @helper_no_markers()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
+
+define void @helper_two_casts() {
+  %a = alloca i32
+  %b = bitcast i32* %a to i8*
+  call void @llvm.lifetime.start(i64 4, i8* %b)
+  %c = bitcast i32* %a to i8*
+  call void @llvm.lifetime.end(i64 4, i8* %c)
+  ret void
+}
+
+define void @test_two_casts() {
+; CHECK: @test_two_casts
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 4
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 4
+  call void @helper_two_casts()
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 4
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 4
+  call void @helper_two_casts()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll b/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
new file mode 100644
index 0000000..02b64e3
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine
+; PR9579
+
+define <2 x i16> @entry(<2 x i16> %a) nounwind {
+entry:
+  %a.addr = alloca <2 x i16>, align 4
+  %.compoundliteral = alloca <2 x i16>, align 4
+  store <2 x i16> %a, <2 x i16>* %a.addr, align 4
+  %tmp = load <2 x i16>* %a.addr, align 4
+  store <2 x i16> zeroinitializer, <2 x i16>* %.compoundliteral
+  %tmp1 = load <2 x i16>* %.compoundliteral
+  %cmp = icmp uge <2 x i16> %tmp, %tmp1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  ret <2 x i16> %sext
+}
diff --git a/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll b/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll
new file mode 100644
index 0000000..fba7239
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; rdar://problem/9267970
+; ideally this test will run on a 32-bit host
+; must not discard GEPs that might overflow at runtime (aren't inbounds)
+
+define i32 @main(i32 %argc) {
+entry:
+    %tmp1 = add i32 %argc, -2
+    %tmp2 = add i32 %argc, 1879048192
+    %p = alloca i8
+; CHECK: getelementptr
+    %p1 = getelementptr i8* %p, i32 %tmp1
+; CHECK: getelementptr
+    %p2 = getelementptr i8* %p, i32 %tmp2
+    %cmp = icmp ult i8* %p1, %p2
+    br i1 %cmp, label %bbtrue, label %bbfalse
+bbtrue:          ; preds = %entry
+    ret i32 -1
+bbfalse:         ; preds = %entry
+    ret i32 0
+}
diff --git a/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll b/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
new file mode 100644
index 0000000..b096d1f
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
@@ -0,0 +1,57 @@
+; ModuleID = 'test1.c'
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target triple = "x86_64-apple-macosx10.6.6"
+
+define zeroext i16 @foo1(i32 %on_off) nounwind uwtable ssp {
+entry:
+  %on_off.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  %tmp = load i32* %on_off.addr, align 4
+  %sub = sub i32 1, %tmp
+; CHECK-NOT: mul i32
+  %mul = mul i32 %sub, -2
+; CHECK: shl
+; CHECK-NEXT: add
+  store i32 %mul, i32* %a, align 4
+  %tmp1 = load i32* %a, align 4
+  %conv = trunc i32 %tmp1 to i16
+  ret i16 %conv
+}
+
+define zeroext i16 @foo2(i32 %on_off, i32 %q) nounwind uwtable ssp {
+entry:
+  %on_off.addr = alloca i32, align 4
+  %q.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  store i32 %q, i32* %q.addr, align 4
+  %tmp = load i32* %q.addr, align 4
+  %tmp1 = load i32* %on_off.addr, align 4
+  %sub = sub i32 %tmp, %tmp1
+; CHECK-NOT: mul i32
+  %mul = mul i32 %sub, -4
+; CHECK: sub i32
+; CHECK-NEXT: shl
+  store i32 %mul, i32* %a, align 4
+  %tmp2 = load i32* %a, align 4
+  %conv = trunc i32 %tmp2 to i16
+  ret i16 %conv
+}
+
+define zeroext i16 @foo3(i32 %on_off) nounwind uwtable ssp {
+entry:
+  %on_off.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  %tmp = load i32* %on_off.addr, align 4
+  %sub = sub i32 7, %tmp
+; CHECK-NOT: mul i32
+  %mul = mul i32 %sub, -4
+; CHECK: shl
+; CHECK-NEXT: add
+  store i32 %mul, i32* %a, align 4
+  %tmp1 = load i32* %a, align 4
+  %conv = trunc i32 %tmp1 to i16
+  ret i16 %conv
+}
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 2ef8dc0..d084873 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -53,8 +53,8 @@ define i8 @test4a() {
 define i32 @test4() {
         %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
         ret i32 %X
-; CHECK: %X1 = call i8 @test4a()
-; CHECK: %tmp = zext i8 %X1 to i32
+; CHECK: %X = call i8 @test4a()
+; CHECK: %tmp = zext i8 %X to i32
 ; CHECK: ret i32 %tmp
 }
 
@@ -77,8 +77,8 @@ declare i32 @test6a(i32)
 define i32 @test6() {
         %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
         ret i32 %X
-; CHECK: %X1 = call i32 @test6a(i32 0)
-; CHECK: ret i32 %X1
+; CHECK: %X = call i32 @test6a(i32 0)
+; CHECK: ret i32 %X
 }
 
 
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 2e24f19..8a0897b 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -118,3 +118,17 @@ define i32 @test14(i8 %x) nounwind {
 ; CHECK: @test14
 ; CHECK-NEXT: ret i32 0
 }
+
+; PR9814
+define i32 @test15(i32 %a, i32 %b) nounwind {
+  %shl = shl i32 1, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK: @test15
+; CHECK-NEXT: add i32 %b, -2
+; CHECK-NEXT: lshr i32 %a, 
+; CHECK-NEXT: ret i32
+}
+
+
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 58f8b5d..14741e3 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -96,6 +96,22 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
  ret i1 %Z
 }
 
+; PR9998
+; Make sure we don't transform the ashr here into an sdiv
+; CHECK: @pr9998
+; CHECK: = and i32 %V, 1
+; CHECK: %Z = icmp ne
+; CHECK: ret i1 %Z
+define i1 @pr9998(i32 %V) nounwind {
+entry:
+  %W = shl i32 %V, 31
+  %X = ashr exact i32 %W, 31
+  %Y = sext i32 %X to i64
+  %Z = icmp ugt i64 %Y, 7297771788697658747
+  ret i1 %Z
+}
+
+
 ; CHECK: @udiv_icmp1
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 099540a..1237ade 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -510,3 +510,14 @@ define i1 @test52(i32 %x1) nounwind {
   ret i1 %A
 }
 
+; PR9838
+; CHECK: @test53
+; CHECK-NEXT: ashr exact
+; CHECK-NEXT: ashr
+; CHECK-NEXT: icmp
+define i1 @test53(i32 %a, i32 %b) nounwind {
+ %x = ashr exact i32 %a, 30
+ %y = ashr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 332cd46..107f313 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -30,9 +30,9 @@ define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) {
 ; CHECK: @uaddtest2
 ; CHECK-NEXT: %and.A = and i8 %A, 127
 ; CHECK-NEXT: %and.B = and i8 %B, 127
-; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B
+; CHECK-NEXT: %x = add nuw i8 %and.A, %and.B
 ; CHECK-NEXT: store i1 false, i1* %overflowPtr
-; CHECK-NEXT: ret i8 %1
+; CHECK-NEXT: ret i8 %x
 }
 
 define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
@@ -46,9 +46,9 @@ define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
 ; CHECK: @uaddtest3
 ; CHECK-NEXT: %or.A = or i8 %A, -128
 ; CHECK-NEXT: %or.B = or i8 %B, -128
-; CHECK-NEXT: %1 = add i8 %or.A, %or.B
+; CHECK-NEXT: %x = add i8 %or.A, %or.B
 ; CHECK-NEXT: store i1 true, i1* %overflowPtr
-; CHECK-NEXT: ret i8 %1
+; CHECK-NEXT: ret i8 %x
 }
 
 define i8 @uaddtest4(i8 %A, i1* %overflowPtr) {
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 94a5732..c0bb28d 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -332,8 +332,8 @@ define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
   %F = or i64 %D, %E
   ret i64 %F
 ; CHECK: @test31
-; CHECK-NEXT: %E1 = and i64 %A, 4294908984
-; CHECK-NEXT: %F = or i64 %E1, 32962
+; CHECK-NEXT: %E = and i64 %A, 4294908984
+; CHECK-NEXT: %F = or i64 %E, 32962
 ; CHECK-NEXT: ret i64 %F
 }
 
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 3925907..4ca9bd2 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -749,3 +749,53 @@ define i1 @test55(i1 %X, i32 %Y, i32 %Z) {
 ; CHECK: icmp eq
 ; CHECK: ret i1
 }
+
+define i32 @test56(i16 %x) nounwind {
+  %tobool = icmp eq i16 %x, 0
+  %conv = zext i16 %x to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  ret i32 %cond
+; CHECK: @test56
+; CHECK-NEXT: zext
+; CHECK-NEXT: ret
+}
+
+define i32 @test57(i32 %x, i32 %y) nounwind {
+  %and = and i32 %x, %y
+  %tobool = icmp eq i32 %x, 0
+  %.and = select i1 %tobool, i32 0, i32 %and
+  ret i32 %.and
+; CHECK: @test57
+; CHECK-NEXT: and i32 %x, %y
+; CHECK-NEXT: ret
+}
+
+define i32 @test58(i16 %x) nounwind {
+  %tobool = icmp ne i16 %x, 1
+  %conv = zext i16 %x to i32
+  %cond = select i1 %tobool, i32 %conv, i32 1
+  ret i32 %cond
+; CHECK: @test58
+; CHECK-NEXT: zext
+; CHECK-NEXT: ret
+}
+
+define i32 @test59(i32 %x, i32 %y) nounwind {
+  %and = and i32 %x, %y
+  %tobool = icmp ne i32 %x, %y
+  %.and = select i1 %tobool, i32 %and, i32 %y
+  ret i32 %.and
+; CHECK: @test59
+; CHECK-NEXT: and i32 %x, %y
+; CHECK-NEXT: ret
+}
+
+define i1 @test60(i32 %x, i1* %y) nounwind {
+  %cmp = icmp eq i32 %x, 0
+  %load = load i1* %y, align 1
+  %cmp1 = icmp slt i32 %x, 1
+  %sel = select i1 %cmp, i1 %load, i1 %cmp1
+  ret i1 %sel
+; CHECK: @test60
+; CHECK: select
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index bded68a..d9ac9cb 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -506,3 +506,24 @@ define i32 @test41(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT: shl i32 8, %b
 ; CHECK-NEXT: ret i32
 }
+
+define i32 @test42(i32 %a, i32 %b) nounwind {
+  %div = lshr i32 4096, %b    ; must be exact otherwise we'd divide by zero
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK: @test42
+; CHECK-NEXT: lshr exact i32 4096, %b
+}
+
+define i32 @test43(i32 %a, i32 %b) nounwind {
+  %div = shl i32 4096, %b    ; must be exact otherwise we'd divide by zero
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK: @test43
+; CHECK-NEXT: add i32 %b, 12
+; CHECK-NEXT: lshr
+; CHECK-NEXT: ret
+}
+
+
+
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 9f308aa..e0188fe 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -136,3 +136,19 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
+
+; <rdar://problem/6945110>
+define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
+entry:
+	%tmp = load <4 x i16>* %src
+	%tmp1 = load <8 x i16>* %foo
+; CHECK: %tmp2 = shufflevector
+	%tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
+; CHECK-NOT: shufflevector
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: pmovzxwd
+	%0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+	ret <4 x i32> %0
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/test/Transforms/InstCombine/x86-crc32-demanded.ll b/test/Transforms/InstCombine/x86-crc32-demanded.ll
new file mode 100644
index 0000000..878b97d
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-crc32-demanded.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; crc32 with 64-bit destination zeros high 32-bit.
+; rdar://9467055
+
+define i64 @test() nounwind {
+entry:
+; CHECK: test
+; CHECK: tail call i64 @llvm.x86.sse42.crc32.64.64
+; CHECK-NOT: and
+; CHECK: ret
+  %0 = tail call i64 @llvm.x86.sse42.crc32.64.64(i64 0, i64 4) nounwind
+  %1 = and i64 %0, 4294967295
+  ret i64 %1
+}
+
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/test/Transforms/InstSimplify/maxmin.ll b/test/Transforms/InstSimplify/maxmin.ll
new file mode 100644
index 0000000..e921214
--- /dev/null
+++ b/test/Transforms/InstSimplify/maxmin.ll
@@ -0,0 +1,269 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i1 @max1(i32 %x, i32 %y) {
+; CHECK: @max1
+  %c = icmp sgt i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp slt i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @max2(i32 %x, i32 %y) {
+; CHECK: @max2
+  %c = icmp sge i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp sge i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @max3(i32 %x, i32 %y) {
+; CHECK: @max3
+  %c = icmp ugt i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp ult i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @max4(i32 %x, i32 %y) {
+; CHECK: @max4
+  %c = icmp uge i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp uge i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @max5(i32 %x, i32 %y) {
+; CHECK: @max5
+  %c = icmp sgt i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp sgt i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @max6(i32 %x, i32 %y) {
+; CHECK: @max6
+  %c = icmp sge i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp sle i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @max7(i32 %x, i32 %y) {
+; CHECK: @max7
+  %c = icmp ugt i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp ugt i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @max8(i32 %x, i32 %y) {
+; CHECK: @max8
+  %c = icmp uge i32 %x, %y
+  %m = select i1 %c, i32 %x, i32 %y
+  %r = icmp ule i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @min1(i32 %x, i32 %y) {
+; CHECK: @min1
+  %c = icmp sgt i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp sgt i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @min2(i32 %x, i32 %y) {
+; CHECK: @min2
+  %c = icmp sge i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp sle i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @min3(i32 %x, i32 %y) {
+; CHECK: @min3
+  %c = icmp ugt i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp ugt i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @min4(i32 %x, i32 %y) {
+; CHECK: @min4
+  %c = icmp uge i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp ule i32 %m, %x
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @min5(i32 %x, i32 %y) {
+; CHECK: @min5
+  %c = icmp sgt i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp slt i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @min6(i32 %x, i32 %y) {
+; CHECK: @min6
+  %c = icmp sge i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp sge i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @min7(i32 %x, i32 %y) {
+; CHECK: @min7
+  %c = icmp ugt i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp ult i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @min8(i32 %x, i32 %y) {
+; CHECK: @min8
+  %c = icmp uge i32 %x, %y
+  %m = select i1 %c, i32 %y, i32 %x
+  %r = icmp uge i32 %x, %m
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @maxmin1(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin1
+  %c1 = icmp sge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp sge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp sge i32 %max, %min
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @maxmin2(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin2
+  %c1 = icmp sge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp sge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp sgt i32 %min, %max
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @maxmin3(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin3
+  %c1 = icmp sge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp sge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp sle i32 %min, %max
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @maxmin4(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin4
+  %c1 = icmp sge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp sge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp slt i32 %max, %min
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @maxmin5(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin5
+  %c1 = icmp uge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp uge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp uge i32 %max, %min
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @maxmin6(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin6
+  %c1 = icmp uge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp uge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp ugt i32 %min, %max
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @maxmin7(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin7
+  %c1 = icmp uge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp uge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp ule i32 %min, %max
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @maxmin8(i32 %x, i32 %y, i32 %z) {
+; CHECK: @maxmin8
+  %c1 = icmp uge i32 %x, %y
+  %max = select i1 %c1, i32 %x, i32 %y
+  %c2 = icmp uge i32 %x, %z
+  %min = select i1 %c2, i32 %z, i32 %x
+  %c = icmp ult i32 %max, %min
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @eqcmp1(i32 %x, i32 %y) {
+; CHECK: @eqcmp1
+  %c = icmp sge i32 %x, %y
+  %max = select i1 %c, i32 %x, i32 %y
+  %r = icmp eq i32 %max, %x
+  ret i1 %r
+; CHECK: ret i1 %c
+}
+
+define i1 @eqcmp2(i32 %x, i32 %y) {
+; CHECK: @eqcmp2
+  %c = icmp sge i32 %x, %y
+  %max = select i1 %c, i32 %x, i32 %y
+  %r = icmp eq i32 %x, %max
+  ret i1 %r
+; CHECK: ret i1 %c
+}
+
+define i1 @eqcmp3(i32 %x, i32 %y) {
+; CHECK: @eqcmp3
+  %c = icmp uge i32 %x, %y
+  %max = select i1 %c, i32 %x, i32 %y
+  %r = icmp eq i32 %max, %x
+  ret i1 %r
+; CHECK: ret i1 %c
+}
+
+define i1 @eqcmp4(i32 %x, i32 %y) {
+; CHECK: @eqcmp4
+  %c = icmp uge i32 %x, %y
+  %max = select i1 %c, i32 %x, i32 %y
+  %r = icmp eq i32 %x, %max
+  ret i1 %r
+; CHECK: ret i1 %c
+}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 485114c..9695418 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -347,3 +347,40 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
+
+
+
+; PR9815 - This is a partial overlap case that cannot be safely transformed
+; into a memcpy.
+@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
+
+define i32 @test14() nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %add = add nsw i32 %tmp5, 4
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, 5
+  %idxprom5 = sext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
+  store i32 %tmp2, i32* %arrayidx6, align 4
+  %inc = add nsw i32 %tmp5, 1
+  %cmp = icmp slt i32 %inc, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
+  ret i32 %tmp8
+; CHECK: @test14
+; CHECK: for.body:
+; CHECK: load i32
+; CHECK: store i32
+; CHECK: br i1 %cmp
+
+}
+
+
diff --git a/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll b/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll
index 4032a59..4032a59 100644
--- a/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll
diff --git a/test/Transforms/LoopStrengthReduce/X86/dg.exp b/test/Transforms/LoopStrengthReduce/X86/dg.exp
new file mode 100644
index 0000000..7b7bd4e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
new file mode 100644
index 0000000..294c090
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -0,0 +1,91 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; PR9939
+
+; LSR should property handle the post-inc offset when folding the
+; non-IV operand of an icmp into the IV.
+
+; CHECK:   %tmp2 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   %tmp3 = lshr i64 %tmp2, 1
+; CHECK:   %tmp4 = mul i64 %tmp3, 2
+; CHECK:   br label %for.body
+; CHECK: for.body:
+; CHECK:   %lsr.iv5 = phi i64 [ %lsr.iv.next, %for.body ], [ %tmp4, %for.body.lr.ph ]
+; CHECK:   %lsr.iv.next = add i64 %lsr.iv5, -2
+; CHECK:   %lsr.iv.next6 = inttoptr i64 %lsr.iv.next to i16*
+; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next6, null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Vector2 = type { i16*, [64 x i16], i32 }
+
+@.str = private unnamed_addr constant [37 x i8] c"0123456789abcdefghijklmnopqrstuvwxyz\00"
+
+define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2* nocapture %result) nounwind noinline {
+entry:
+  %buffer = alloca [33 x i16], align 16
+  %add.ptr = getelementptr inbounds [33 x i16]* %buffer, i64 0, i64 33
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %0 = phi i64 [ %indvar.next44, %do.body ], [ 0, %entry ]
+  %i.addr.0 = phi i32 [ %div, %do.body ], [ %i, %entry ]
+  %tmp51 = sub i64 32, %0
+  %incdec.ptr = getelementptr [33 x i16]* %buffer, i64 0, i64 %tmp51
+  %rem = urem i32 %i.addr.0, 10
+  %div = udiv i32 %i.addr.0, 10
+  %idxprom = zext i32 %rem to i64
+  %arrayidx = getelementptr inbounds [37 x i8]* @.str, i64 0, i64 %idxprom
+  %tmp5 = load i8* %arrayidx, align 1
+  %conv = sext i8 %tmp5 to i16
+  store i16 %conv, i16* %incdec.ptr, align 2
+  %1 = icmp ugt i32 %i.addr.0, 9
+  %indvar.next44 = add i64 %0, 1
+  br i1 %1, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.body
+  %xap.0 = inttoptr i64 %0 to i1*
+  %cap.0 = ptrtoint i1* %xap.0 to i64
+  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+  %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
+  %conv11 = trunc i64 %sub.ptr.div39 to i32
+  %mLength = getelementptr inbounds %struct.Vector2* %result, i64 0, i32 2
+  %idx.ext21 = bitcast i64 %sub.ptr.div39 to i64
+  %incdec.ptr.sum = add i64 %idx.ext21, -1
+  %cp.0.sum = sub i64 %incdec.ptr.sum, %0
+  %add.ptr22 = getelementptr [33 x i16]* %buffer, i64 1, i64 %cp.0.sum
+  %cmp2740 = icmp eq i64 %idx.ext21, 0
+  br i1 %cmp2740, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %do.end
+  %tmp16 = load i32* %mLength, align 4
+  %mBegin = getelementptr inbounds %struct.Vector2* %result, i64 0, i32 0
+  %tmp14 = load i16** %mBegin, align 8
+  %tmp48 = zext i32 %tmp16 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ]
+  %tmp46 = add i64 %tmp51, %indvar
+  %p.042 = getelementptr [33 x i16]* %buffer, i64 0, i64 %tmp46
+  %tmp47 = sub i64 %indvar, %0
+  %incdec.ptr32 = getelementptr [33 x i16]* %buffer, i64 1, i64 %tmp47
+  %tmp49 = add i64 %tmp48, %indvar
+  %dst.041 = getelementptr i16* %tmp14, i64 %tmp49
+  %tmp29 = load i16* %p.042, align 2
+  store i16 %tmp29, i16* %dst.041, align 2
+  %cmp27 = icmp eq i16* %incdec.ptr32, %add.ptr22
+  %indvar.next = add i64 %indvar, 1
+  br i1 %cmp27, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %do.end
+  %tmp38 = load i32* %mLength, align 4
+  %add = add i32 %tmp38, %conv11
+  store i32 %add, i32* %mLength, align 4
+  ret void
+}
diff --git a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
new file mode 100644
index 0000000..61c54dd
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
@@ -0,0 +1,28 @@
+; RUN: opt -loop-unswitch -disable-output
+; PR10031
+
+define i32 @test(i32 %command) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then14, %tailrecurse, %entry
+  br i1 undef, label %if.then, label %tailrecurse
+
+if.then:                                          ; preds = %tailrecurse
+  switch i32 %command, label %sw.bb [
+    i32 2, label %land.lhs.true
+    i32 0, label %land.lhs.true
+  ]
+
+land.lhs.true:                                    ; preds = %if.then, %if.then
+  br i1 undef, label %sw.bb, label %if.then14
+
+if.then14:                                        ; preds = %land.lhs.true
+  switch i32 %command, label %tailrecurse [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %if.then14
+  unreachable
+}
diff --git a/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll b/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
new file mode 100644
index 0000000..132966e
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+; PR10067
+; Make sure the call+copy isn't optimized in such a way that
+; %ret ends up with the wrong value.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10"
+
+%struct1 = type { i32, i32 }
+%struct2 = type { %struct1, i8* }
+
+declare void @bar(%struct1* nocapture sret %agg.result) nounwind
+
+define i32 @foo() nounwind {
+  %x = alloca %struct1, align 8
+  %y = alloca %struct2, align 8
+  call void @bar(%struct1* sret %x) nounwind
+; CHECK: call void @bar(%struct1* sret %x)
+
+  %gepn1 = getelementptr inbounds %struct2* %y, i32 0, i32 0, i32 0
+  store i32 0, i32* %gepn1, align 8
+  %gepn2 = getelementptr inbounds %struct2* %y, i32 0, i32 0, i32 1
+  store i32 0, i32* %gepn2, align 4
+
+  %bit1 = bitcast %struct1* %x to i64*
+  %bit2 = bitcast %struct2* %y to i64*
+  %load = load i64* %bit1, align 8
+  store i64 %load, i64* %bit2, align 8
+
+; CHECK: %load = load i64* %bit1, align 8
+; CHECK: store i64 %load, i64* %bit2, align 8
+
+  %gep1 = getelementptr %struct2* %y, i32 0, i32 0, i32 0
+  %ret = load i32* %gep1
+  ret i32 %ret
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index b387d32..5c6a94c 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -109,3 +109,23 @@ define void @test6(i8 *%P) {
 ; CHECK-NEXT: ret void
 }
 
+
+; PR9794 - Should forward memcpy into byval argument even though the memcpy
+; isn't itself 8 byte aligned.
+%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
+entry:
+  %agg.tmp = alloca %struct.p, align 4
+  %tmp = bitcast %struct.p* %agg.tmp to i8*
+  %tmp1 = bitcast %struct.p* %q to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
+  %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
+  ret i32 %call
+; CHECK: @test7
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
+}
+
+declare i32 @g(%struct.p* byval align 8)
+
+
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index c66e150..e5b2ba4 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -O3 -S %s | FileCheck %s
+; XFAIL: *
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
diff --git a/test/Transforms/PhaseOrdering/dg.exp b/test/Transforms/PhaseOrdering/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
new file mode 100644
index 0000000..816cb60
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR9820
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16
+
+define i32* @noop(i32* %p_29) nounwind readnone {
+entry:
+  ret i32* %p_29
+}
+
+define i32 @main() nounwind {
+entry:
+  %l_10 = alloca [4 x i32], align 16
+  %tmp = bitcast [4 x i32]* %l_10 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
+; CHECK: call void @llvm.memcpy
+  %arrayidx = getelementptr inbounds [4 x i32]* %l_10, i64 0, i64 0
+  %call = call i32* @noop(i32* %arrayidx)
+  store i32 0, i32* %call
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
new file mode 100644
index 0000000..32e67fb
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%0 = type { <2 x float>, float }
+%struct.PointC3 = type { %struct.array }
+%struct.Point_3 = type { %struct.PointC3 }
+%struct.array = type { [3 x float], [4 x i8] }
+
+; CHECK: main
+; CHECK-NOT: alloca
+; CHECK: extractelement <2 x float> zeroinitializer
+
+define void @main() uwtable ssp {
+entry:
+  %ref.tmp2 = alloca %0, align 16
+  %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3*
+  %0 = getelementptr %0* %ref.tmp2, i64 0, i32 0
+  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
+  %1 = getelementptr inbounds %struct.Point_3* %tmpcast, i64 0, i32 0
+  %base.i.i.i = getelementptr inbounds %struct.PointC3* %1, i64 0, i32 0
+  %arrayidx.i.i.i.i = getelementptr inbounds %struct.array* %base.i.i.i, i64 0, i32 0, i64 0
+  %tmp5.i.i = load float* %arrayidx.i.i.i.i, align 4
+  ret void
+}
+
+; CHECK: test1
+; CHECK-NOT: alloca
+; CHECK: extractelement <2 x float> zeroinitializer
+
+define void @test1() uwtable ssp {
+entry:
+  %ref.tmp2 = alloca {<2 x float>, float}, align 16
+  %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
+  %0 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
+  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
+  %tmp5.i.i = load float* %tmpcast, align 4
+  ret void
+}
+
+; CHECK: test2
+; CHECK-NOT: alloca
+; CHECK: and i128
+; CHECK: or i128
+; CHECK: trunc i128
+; CHECK-NOT: insertelement
+; CHECK-NOT: extractelement
+
+define float @test2() uwtable ssp {
+entry:
+  %ref.tmp2 = alloca {<2 x float>, float}, align 16
+  %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
+  %tmpcast2 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 1
+  %0 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
+  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
+  store float 1.0, float* %tmpcast2, align 4
+  %r1 = load float* %tmpcast, align 4
+  %r2 = load float* %tmpcast2, align 4
+  %r = fadd float %r1, %r2
+  ret float %r
+}
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
new file mode 100644
index 0000000..c149134
--- /dev/null
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+; CHECK: f
+; CHECK-NOT: llvm.dbg.declare
+; CHECK: llvm.dbg.value
+; CHECK: llvm.dbg.value
+; CHECK: llvm.dbg.value
+; CHECK: llvm.dbg.value
+; CHECK: llvm.dbg.value
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !6), !dbg !7
+  store i32 %b, i32* %b.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !8), !dbg !9
+  call void @llvm.dbg.declare(metadata !{i32* %c}, metadata !10), !dbg !12
+  %tmp = load i32* %a.addr, align 4, !dbg !13
+  store i32 %tmp, i32* %c, align 4, !dbg !13
+  %tmp1 = load i32* %a.addr, align 4, !dbg !14
+  %tmp2 = load i32* %b.addr, align 4, !dbg !14
+  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
+  store i32 %add, i32* %a.addr, align 4, !dbg !14
+  %tmp3 = load i32* %c, align 4, !dbg !15
+  %tmp4 = load i32* %b.addr, align 4, !dbg !15
+  %sub = sub nsw i32 %tmp3, %tmp4, !dbg !15
+  store i32 %sub, i32* %b.addr, align 4, !dbg !15
+  %tmp5 = load i32* %a.addr, align 4, !dbg !16
+  %tmp6 = load i32* %b.addr, align 4, !dbg !16
+  %add7 = add nsw i32 %tmp5, %tmp6, !dbg !16
+  ret i32 %add7, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 1, i32 11, metadata !1, null}
+!8 = metadata !{i32 590081, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 1, i32 18, metadata !1, null}
+!10 = metadata !{i32 590080, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 2, i32 9, metadata !11, null}
+!13 = metadata !{i32 2, i32 14, metadata !11, null}
+!14 = metadata !{i32 3, i32 5, metadata !11, null}
+!15 = metadata !{i32 4, i32 5, metadata !11, null}
+!16 = metadata !{i32 5, i32 5, metadata !11, null}
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
index 6b8422c..ae2c6cc 100644
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ b/test/Transforms/ScalarRepl/debuginfo.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl-ssa -S | not grep alloca
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
diff --git a/test/Transforms/SimplifyCFG/PR9946.ll b/test/Transforms/SimplifyCFG/PR9946.ll
new file mode 100644
index 0000000..4a61b84
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR9946.ll
@@ -0,0 +1,18 @@
+; RUN: opt  %s -simplifycfg -disable-output
+
+@foo = external constant i32
+
+define i32 @f() {
+entry:
+  br i1 icmp eq (i64 and (i64 ptrtoint (i32* @foo to i64), i64 15), i64 0), label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i32 [ 1, %if.end ], [ 0, %if.then ]
+  ret i32 %storemerge
+}
diff --git a/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll b/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll
new file mode 100644
index 0000000..3996efd
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S <%s -simplifycfg | FileCheck %s
+
+define void @test_br(i32 %x) {
+entry:
+; CHECK: @test_br
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+  %cmp = icmp eq i32 %x, 10
+  br i1 %cmp, label %if.then, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @test_switch(i32 %x) nounwind {
+entry:
+; CHECK: @test_switch
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+  %rem = srem i32 %x, 3
+  switch i32 %rem, label %sw.bb [
+    i32 1, label %sw.bb
+    i32 10, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %sw.default, %entry, %entry
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb
+  ret void
+}
+
+define void @test_indirectbr(i32 %x) {
+entry:
+; CHECK: @test_indirectbr
+; CHECK-NEXT: entry:
+; Ideally this should now check:
+;   CHK-NEXT: ret void
+; But that doesn't happen yet. Instead:
+; CHECK-NEXT: br label %L1
+
+  %label = bitcast i8* blockaddress(@test_indirectbr, %L1) to i8*
+  indirectbr i8* %label, [label %L1, label %L2]
+
+L1:                                               ; preds = %entry
+  ret void
+L2:                                               ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll
index 7fb4def..7853e9a 100644
--- a/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -180,3 +180,72 @@ L3:
 ; before SimplifyCFG even looks at the indirectbr.
   indirectbr i8* %anchor, [label %L1, label %L2]
 }
+
+; PR10072
+
+@xblkx.bbs = internal unnamed_addr constant [9 x i8*] [i8* blockaddress(@indbrtest7, %xblkx.begin), i8* blockaddress(@indbrtest7, %xblkx.begin3), i8* blockaddress(@indbrtest7, %xblkx.begin4), i8* blockaddress(@indbrtest7, %xblkx.begin5), i8* blockaddress(@indbrtest7, %xblkx.begin6), i8* blockaddress(@indbrtest7, %xblkx.begin7), i8* blockaddress(@indbrtest7, %xblkx.begin8), i8* blockaddress(@indbrtest7, %xblkx.begin9), i8* blockaddress(@indbrtest7, %xblkx.end)]
+
+define void @indbrtest7() {
+escape-string.top:
+  %xval202x = call i32 @xfunc5x()
+  br label %xlab5x
+
+xlab8x:                                           ; preds = %xlab5x
+  %xvaluex = call i32 @xselectorx()
+  %xblkx.x = getelementptr [9 x i8*]* @xblkx.bbs, i32 0, i32 %xvaluex
+  %xblkx.load = load i8** %xblkx.x
+  indirectbr i8* %xblkx.load, [label %xblkx.begin, label %xblkx.begin3, label %xblkx.begin4, label %xblkx.begin5, label %xblkx.begin6, label %xblkx.begin7, label %xblkx.begin8, label %xblkx.begin9, label %xblkx.end]
+
+xblkx.begin:
+  br label %xblkx.end
+
+xblkx.begin3:
+  br label %xblkx.end
+
+xblkx.begin4:
+  br label %xblkx.end
+
+xblkx.begin5:
+  br label %xblkx.end
+
+xblkx.begin6:
+  br label %xblkx.end
+
+xblkx.begin7:
+  br label %xblkx.end
+
+xblkx.begin8:
+  br label %xblkx.end
+
+xblkx.begin9:
+  br label %xblkx.end
+
+xblkx.end:
+  %yes.0 = phi i1 [ false, %xblkx.begin ], [ true, %xlab8x ], [ false, %xblkx.begin9 ], [ false, %xblkx.begin8 ], [ false, %xblkx.begin7 ], [ false, %xblkx.begin6 ], [ false, %xblkx.begin5 ], [ true, %xblkx.begin4 ], [ false, %xblkx.begin3 ]
+  br i1 %yes.0, label %v2j, label %xlab17x
+
+v2j:
+; CHECK: %xunusedx = call i32 @xactionx()
+  %xunusedx = call i32 @xactionx()
+  br label %xlab4x
+
+xlab17x:
+  br label %xlab4x
+
+xlab4x:
+  %incr19 = add i32 %xval704x.0, 1
+  br label %xlab5x
+
+xlab5x:
+  %xval704x.0 = phi i32 [ 0, %escape-string.top ], [ %incr19, %xlab4x ]
+  %xval10x = icmp ult i32 %xval704x.0, %xval202x
+  br i1 %xval10x, label %xlab8x, label %xlab9x
+
+xlab9x:
+  ret void
+}
+
+declare i32 @xfunc5x()
+declare i8 @xfunc7x()
+declare i32 @xselectorx()
+declare i32 @xactionx()
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
new file mode 100644
index 0000000..fc83ec2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+  %i = shl i32 %x, 1
+  switch i32 %i, label %a [
+    i32 21, label %b
+    i32 24, label %c
+  ]
+
+a:
+  ret i32 0
+b:
+  ret i32 3
+c:
+  ret i32 5
+; CHECK: @test1
+; CHECK: %cond = icmp eq i32 %i, 24
+; CHECK: %merge = select i1 %cond, i32 5, i32 0
+; CHECK: ret i32 %merge
+}
+
+
+define i32 @test2(i32 %x) nounwind {
+  %i = shl i32 %x, 1
+  switch i32 %i, label %a [
+    i32 21, label %b
+    i32 23, label %c
+  ]
+
+a:
+  ret i32 0
+b:
+  ret i32 3
+c:
+  ret i32 5
+; CHECK: @test2
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/TailCallElim/setjmp.ll b/test/Transforms/TailCallElim/setjmp.ll
new file mode 100644
index 0000000..7b7fe56
--- /dev/null
+++ b/test/Transforms/TailCallElim/setjmp.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+; Test that we don't tail call in a functions that calls setjmp.
+
+; CHECK-NOT: tail call void @bar()
+
+define void @foo(i32* %x) {
+bb:
+  %tmp75 = tail call i32 @setjmp(i32* %x)
+  call void @bar()
+  ret void
+}
+
+declare i32 @setjmp(i32*)
+
+declare void @bar()
diff --git a/test/Transforms/TailDup/X86/dg.exp b/test/Transforms/TailDup/X86/dg.exp
new file mode 100644
index 0000000..7b7bd4e
--- /dev/null
+++ b/test/Transforms/TailDup/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/Transforms/TailDup/if-tail-dup.ll b/test/Transforms/TailDup/X86/if-tail-dup.ll
index 2e4f5be..2e4f5be 100644
--- a/test/Transforms/TailDup/if-tail-dup.ll
+++ b/test/Transforms/TailDup/X86/if-tail-dup.ll
author	dim <dim@FreeBSD.org>	2011-06-12 15:42:51 +0000
committer	dim <dim@FreeBSD.org>	2011-06-12 15:42:51 +0000
commit	ece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
tree	b3032e51d630e8070e9e08d6641648f195316a80 /test
parent	2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
download	FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz