24 files changed, 941 insertions, 33 deletions
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index c106f57..3f67097 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -11,7 +11,7 @@ target triple = "i686-apple-darwin8.6.1"
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
 
 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
 
@@ -33,8 +33,8 @@ cond_false183:		; preds = %cond_false, %entry
 	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp388 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
-	%tmp392 = bitcast <4 x i32> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]
 	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]
 	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index 49f3a95..b045329 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -17,8 +17,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp98 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
-	%tmp102 = bitcast <4 x i32> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -32,8 +32,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp159 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
-	%tmp163 = bitcast <4 x i32> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -47,8 +47,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp220 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
-	%tmp224 = bitcast <4 x i32> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -62,8 +62,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp281 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
-	%tmp285 = bitcast <4 x i32> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -73,4 +73,4 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 989dfc5..b27ef83 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)
 
@@ -13,8 +13,8 @@ define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32
 	%tmp805 = add <4 x i32> %tmp777, zeroinitializer
 	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>
 	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )
-	%tmp1020 = tail call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
-	%tmp1030 = bitcast <8 x i16> %tmp1020 to <4 x i32>
+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>
 	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030
 	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>
 	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 83eb61a..2aea9c5 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; PR5534
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 0000000..8c62f4d
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Folded Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Reload"
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 )		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}
diff --git a/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 0000000..5398eef
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 0000000..a7c2020
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), %rcx
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB1_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 0000000..3ce9edb
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB1_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 0000000..5c1a2bc
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}
diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 0000000..633995d
--- /dev/null
+++ b/test/CodeGen/X86/bigstructret.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index de89374..4878448 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,7 +1,24 @@
-; RUN: llc < %s -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i64 @foo(i64 %x) {
+define i64 @test0(i64 %x) nounwind {
   %t = icmp eq i64 %x, 0
   %r = zext i1 %t to i64
   ret i64 %r
+; CHECK: test0:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
 }
+
+define i64 @test1(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test1:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-5.ll
new file mode 100644
index 0000000..88fae37
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-5.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index c0379d1..ec5236b 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -74,16 +74,16 @@ exit:
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB3_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: .LBB3_4:
 ; CHECK-NEXT:   call bar99
 ; CHECK-NEXT:   call get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB3_5
+; CHECK-NEXT:   jg .LBB3_6
 ; CHECK-NEXT:   call block_a_true_func
-; CHECK-NEXT:   jmp .LBB3_6
-; CHECK-NEXT: .LBB3_5:
-; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT:   jmp .LBB3_7
 ; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   call block_a_merge_func
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   call body
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index a1f38a7..9b53adb 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -4,7 +4,7 @@
 
 @flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
 
-define void @test(i32 %k, i32 %i) {
+define void @test(i32 %k, i32 %i) nounwind {
 entry:
 	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]
 	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index e340edd..c45a374 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | grep cmp | grep 240
 ; RUN: llc < %s -march=x86 | grep inc | count 1
 
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
 entry:
 	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
 	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 4ec2a02..b07eeb6 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -3,7 +3,7 @@
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
 
-define void @foo(i32 %N) {
+define void @foo(i32 %N) nounwind {
 entry:
 	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %tmp1019, label %bb, label %return
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index 81da82e..bbafcf7c 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 | not grep inc
 
-define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
 entry:
 	br i1 false, label %cond_next191, label %cond_true189
 
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
new file mode 100644
index 0000000..3f90245
--- /dev/null
+++ b/test/CodeGen/X86/object-size.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i32 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movq    $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i32) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6319cb8..21c1a3c 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -145,7 +145,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	ret void
 ; X64: 	t9:
 ; X64: 		movsd	(%rsi), %xmm0
-; X64: 		movhps	%xmm0, (%rdi)
+; X64:	        movaps  (%rdi), %xmm1
+; X64:	        movlhps %xmm0, %xmm1
+; X64:	        movaps  %xmm1, (%rdi)
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 0f32a50..d762392 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
 ; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 6
-; RUN:   grep asm-printer %t   | grep 177
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 0000000..0d86e56
--- /dev/null
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -0,0 +1,408 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*);
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   jbe .LBB3_3
+; CHECK-NEXT:   ucomiss %xmm2, %xmm0
+; CHECK-NEXT:   ja .LBB3_4
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT:   ucomiss %xmm2, %xmm1
+; CHECK-NEXT:   jbe .LBB3_2
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+; CHECK:        jmp .LBB4_7
+; CHECK-NEXT: .LBB4_12:
+; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   movb 16(%rax), %al
+; CHECK-NEXT:   cmpb $16, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   cmpb $23, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   jmp .LBB4_15
+; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT:   cmpb $23, %bl
+; CHECK-NEXT:   jne .LBB4_15
+; CHECK-NEXT: .LBB4_15:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        call func
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index a4f87c0..4923df2 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 4
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
@@ -9,3 +9,24 @@ entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
 }
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 556f103..f4930b0 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -18,4 +18,3 @@ entry:
         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp4
 }
-
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index e42b538..cdb030e 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -12,8 +12,8 @@ bb4743:		; preds = %bb1664
 	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
 	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp5266 = call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
-	%tmp5267 = bitcast <8 x i16> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
 	br label %bb5310
 bb5310:		; preds = %bb4743, %bb1664
@@ -21,4 +21,4 @@ bb5310:		; preds = %bb4743, %bb1664
 	ret i32 0
 }
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone