Vendor import of llvm trunk r132879:

http://llvm.org/svn/llvm-project/llvm/trunk@132879
author: dim <dim@FreeBSD.org> 2011-06-12 15:42:51 +0000
committer: dim <dim@FreeBSD.org> 2011-06-12 15:42:51 +0000
commit: ece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
tree: b3032e51d630e8070e9e08d6641648f195316a80 /test/CodeGen
parent: 2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
download: FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip
FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz
169 files changed, 4086 insertions, 501 deletions
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 3909c6a..0a157c9 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -1,16 +1,16 @@
-; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; RUN: llc -O1 -march=arm -mattr=+vfp2 -mtriple=arm-linux-gnueabi < %s | FileCheck %s
 ; pr4939
 
 define void @test(double* %x, double* %y) nounwind {
-  %1 = load double* %x, align 4
-  %2 = load double* %y, align 4
+  %1 = load double* %x
+  %2 = load double* %y
   %3 = fsub double -0.000000e+00, %1
   %4 = fcmp ugt double %2, %3
   br i1 %4, label %bb1, label %bb2
 
 bb1:
 ;CHECK: vstrhi.64
-  store double %1, double* %y, align 4
+  store double %1, double* %y
   br label %bb2
 
 bb2:
diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll
index a61908f..19f756f 100644
--- a/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -13,6 +13,7 @@ entry:
 ; Make sure the scheduler schedules all uses of the preincrement
 ; induction variable before defining the postincrement value.
 ; CHECK: t:
+; CHECK: %bb
 ; CHECK-NOT: mov
 bb:                                               ; preds = %entry, %bb
   %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index a9dd971..568718c 100644
--- a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -14,15 +14,15 @@ for.cond:
   br i1 %cmp, label %for.body, label %return
 
 for.body:
-; CHECK: %for.body
-; CHECK: movs r{{[0-9]+}}, #1
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32* %A, i32 %0
   %tmp4 = load i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
-; CHECK: %for.cond
-; CHECK: movs r{{[0-9]+}}, #0
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
 
 for.inc:
   %inc = add i32 %0, 1
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
new file mode 100644
index 0000000..0b5f962
--- /dev/null
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -verify-machineinstrs
+; <rdar://problem/9187612>
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin"
+
+define void @func() unnamed_addr align 2 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %tmp2 = phi i32 [ 0, %entry ], [ %add, %for.cond.backedge ]
+  %cmp = icmp ult i32 %tmp2, 14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %add = add i32 %tmp2, 1
+  switch i32 %tmp2, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 4, label %sw.bb
+    i32 5, label %sw.bb
+    i32 10, label %sw.bb
+  ]
+
+sw.bb:
+  invoke void @foo()
+          to label %invoke.cont17 unwind label %lpad
+
+invoke.cont17:
+  invoke void @foo()
+          to label %for.cond.backedge unwind label %lpad26
+
+for.cond.backedge:
+  br label %for.cond
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+lpad26:
+  %exn27 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+sw.default:
+  br label %for.cond.backedge
+
+for.end:
+  invoke void @foo()
+          to label %call8.i.i.i.noexc unwind label %lpad44
+
+call8.i.i.i.noexc:
+  ret void
+
+lpad44:
+  %exn45 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
+  unreachable
+
+terminate.lpad:
+  %exn51 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @foo()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
+
+declare void @_ZSt9terminatev()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"bool", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
new file mode 100644
index 0000000..4db3acf
--- /dev/null
+++ b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 -arm-tail-calls=1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__collate_st_chain_pri = type { [10 x i32], [2 x i32] }
+%struct.__collate_st_char_pri = type { [2 x i32] }
+%struct.__collate_st_info = type { [2 x i8], i8, i8, [2 x i32], [2 x i32], i32, i32 }
+%struct.__collate_st_large_char_pri = type { i32, %struct.__collate_st_char_pri }
+%struct.__collate_st_subst = type { i32, [10 x i32] }
+%struct.__xlocale_st_collate = type { i32, void (i8*)*, [32 x i8], %struct.__collate_st_info, [2 x %struct.__collate_st_subst*], %struct.__collate_st_chain_pri*, %struct.__collate_st_large_char_pri*, [256 x %struct.__collate_st_char_pri] }
+%struct.__xlocale_st_messages = type { i32, void (i8*)*, i8*, %struct.lc_messages_T }
+%struct.__xlocale_st_monetary = type { i32, void (i8*)*, i8*, %struct.lc_monetary_T }
+%struct.__xlocale_st_numeric = type { i32, void (i8*)*, i8*, %struct.lc_numeric_T }
+%struct.__xlocale_st_runelocale = type { i32, void (i8*)*, [32 x i8], i32, i32, i32 (i32*, i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (%union.__mbstate_t*, %struct._xlocale*)*, i32 (i32*, i8**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32, %struct._RuneLocale }
+%struct.__xlocale_st_time = type { i32, void (i8*)*, i8*, %struct.lc_time_T }
+%struct._xlocale = type { i32, void (i8*)*, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, i32, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.__xlocale_st_collate*, %struct.__xlocale_st_runelocale*, %struct.__xlocale_st_messages*, %struct.__xlocale_st_monetary*, %struct.__xlocale_st_numeric*, %struct._xlocale*, %struct.__xlocale_st_time*, %struct.lconv }
+%struct.lc_messages_T = type { i8*, i8*, i8*, i8* }
+%struct.lc_monetary_T = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+%struct.lc_numeric_T = type { i8*, i8*, i8* }
+%struct.lc_time_T = type { [12 x i8*], [12 x i8*], [7 x i8*], [7 x i8*], i8*, i8*, i8*, i8*, i8*, i8*, [12 x i8*], i8*, i8* }
+%struct.lconv = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+%union.__mbstate_t = type { i64, [120 x i8] }
+
+@"\01_fnmatch.initial" = external constant %union.__mbstate_t, align 4
+
+; CHECK: _fnmatch
+; CHECK: blx _fnmatch1
+
+define i32 @"\01_fnmatch"(i8* %pattern, i8* %string, i32 %flags) nounwind optsize {
+entry:
+  %call4 = tail call i32 @fnmatch1(i8* %pattern, i8* %string, i8* %string, i32 %flags, %union.__mbstate_t* byval @"\01_fnmatch.initial", %union.__mbstate_t* byval @"\01_fnmatch.initial", %struct._xlocale* undef, i32 64) optsize
+  ret i32 %call4
+}
+
+declare i32 @fnmatch1(i8*, i8*, i8*, i32, %union.__mbstate_t* byval, %union.__mbstate_t* byval, %struct._xlocale*, i32) nounwind optsize
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
new file mode 100644
index 0000000..0a7bb6c
--- /dev/null
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define i32 @foo(float %scale, float %scale2) nounwind {
+entry:
+  %scale.addr = alloca float, align 4
+  %scale2.addr = alloca float, align 4
+  store float %scale, float* %scale.addr, align 4
+  store float %scale2, float* %scale2.addr, align 4
+  %tmp = load float* %scale.addr, align 4
+  %tmp1 = load float* %scale2.addr, align 4
+  call void asm sideeffect "vmul.f32    q0, q0, ${0:y} \0A\09vmul.f32    q1, q1, ${0:y} \0A\09vmul.f32    q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
+  ret i32 0
+}
+
+define void @f0() nounwind {
+entry:
+; CHECK: f0
+; CHECK: .word -1
+call void asm sideeffect ".word ${0:B} \0A\09", "i"(i32 0) nounwind
+ret void
+}
+
+define void @f1() nounwind {
+entry:
+; CHECK: f1
+; CHECK: .word 65535
+call void asm sideeffect ".word ${0:L} \0A\09", "i"(i32 -1) nounwind
+ret void
+}
+
+@f2_ptr = internal global i32* @f2_var, align 4
+@f2_var = external global i32
+
+define void @f2() nounwind {
+entry:
+; CHECK: f2
+; CHECK: ldr r0, [r{{[0-9]+}}]
+call void asm sideeffect "ldr r0, [${0:m}]\0A\09", "*m,~{r0}"(i32** @f2_ptr) nounwind
+ret void
+}
+
+@f3_ptr = internal global i64* @f3_var, align 4
+@f3_var = external global i64
+@f3_var2 = external global i64
+
+define void @f3() nounwind {
+entry:
+; CHECK: f3
+; CHECK: stm r{{[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
+; CHECK: adds lr, [[REG1]]
+; CHECK: ldm r{{[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
+%tmp = load i64* @f3_var, align 4
+%tmp1 = load i64* @f3_var2, align 4
+%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
+store i64 %0, i64* @f3_var, align 4
+%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
+store i64 %1, i64* @f3_var, align 4
+ret void
+}
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
new file mode 100644
index 0000000..03940e3
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+define void @func(i32 %argc, i8** %argv) nounwind {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
+	store i32 %0, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
+	store i32 %2, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
+	store i32 %3, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: and
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
+	store i32 %4, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: or
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
+	store i32 %5, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: eor
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
+	store i32 %6, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
+	store i32 %8, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
+	store i32 %10, i32* %old
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.min.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.max.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umax.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umin.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind 
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 946db19..c94b096 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -31,8 +31,7 @@ define i32 @f3(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f3
 ; CHECK: lsr{{.*}} #7
-; CHECK: mov r0, r1
-; CHECK: bfi r0, r2, #7, #16
+; CHECK: bfi {{.*}}, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
   %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
@@ -42,8 +41,8 @@ entry:
 ; rdar://8752056
 define i32 @f4(i32 %a) nounwind {
 ; CHECK: f4
-; CHECK: movw r1, #3137
-; CHECK: bfi r1, r0, #15, #5
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{r[0-9]+}}, #15, #5
   %1 = shl i32 %a, 15
   %ins7 = and i32 %1, 1015808
   %ins12 = or i32 %ins7, 3137
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 4dc37aa..c460f7a 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
@@ -16,6 +16,10 @@ define void @t1() {
 define void @t2() {
 ; CHECKV6: t2:
 ; CHECKV6: bx r0 @ TAILCALL
+; CHECKT2D: t2:
+; CHECKT2D: ldr
+; CHECKT2D-NEXT: ldr
+; CHECKT2D-NEXT: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
@@ -26,6 +30,9 @@ define void @t3() {
 ; CHECKV6: b _t2  @ TAILCALL
 ; CHECKELF: t3:
 ; CHECKELF: b t2(PLT) @ TAILCALL
+; CHECKT2D: t3:
+; CHECKT2D: b.w _t2  @ TAILCALL
+
         tail call void @t2( )            ; <i32> [#uses=0]
         ret void
 }
@@ -71,10 +78,10 @@ declare void @foo() nounwind
 
 define void @t7() nounwind {
 entry:
-; CHECKT2: t7:
-; CHECKT2: blxeq _foo
-; CHECKT2-NEXT: pop.w
-; CHECKT2-NEXT: b _foo
+; CHECKT2D: t7:
+; CHECKT2D: blxeq _foo
+; CHECKT2D-NEXT: pop.w
+; CHECKT2D-NEXT: b.w _foo
   br i1 undef, label %bb, label %bb1.lr.ph
 
 bb1.lr.ph:
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
new file mode 100644
index 0000000..9bdae43
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: 	vadd.f32	q4, q8, q8
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT: 	@DEBUG_VALUE: y <- Q4+0
+;CHECK-NEXT:    @DEBUG_VALUE: x <- Q4+0
+
+
+@.str = external constant [13 x i8]
+
+declare <4 x float> @test0001(float) nounwind readnone ssp
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %entry
+  %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
+  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+
+for.end54:                                        ; preds = %for.body9
+  %tmp115 = extractelement <4 x float> %add19, i32 1
+  %conv6.i75 = fpext float %tmp115 to double, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  %tmp116 = extractelement <4 x float> %add20, i32 1
+  %conv6.i76 = fpext float %tmp116 to double, !dbg !45
+  %call.i83 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
+  ret i32 0, !dbg !49
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !10, !14}
+!llvm.dbg.lv.test0001 = !{!18}
+!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
+!llvm.dbg.lv.printFV = !{!30}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null}
+!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!34 = metadata !{metadata !35, metadata !37}
+!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 79, i32 7, metadata !40, null}
+!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{i32 75, i32 5, metadata !42, null}
+!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
+!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!48 = metadata !{i32 95, i32 3, metadata !25, null}
+!49 = metadata !{i32 99, i32 3, metadata !25, null}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
new file mode 100644
index 0000000..16aeab3
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9376013
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp1
+;CHECK-NEXT:        .long   Ltmp3
+;CHECK-NEXT: Lset9 = Ltmp10-Ltmp9                    @ Loc expr size
+;CHECK-NEXT:        .short  Lset9
+;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
+
+define void @_Z3foov() optsize ssp {
+entry:
+  %call = tail call float @_Z3barv() optsize, !dbg !11
+  tail call void @llvm.dbg.value(metadata !{float %call}, i64 0, metadata !5), !dbg !11
+  %call16 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp7 = fcmp olt float %call, %call16, !dbg !12
+  br i1 %cmp7, label %for.body, label %for.end, !dbg !12
+
+for.body:                                         ; preds = %entry, %for.body
+  %k.08 = phi float [ %inc, %for.body ], [ %call, %entry ]
+  %call4 = tail call float @_Z2f3f(float %k.08) optsize, !dbg !13
+  %inc = fadd float %k.08, 1.000000e+00, !dbg !14
+  %call1 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp = fcmp olt float %inc, %call1, !dbg !12
+  br i1 %cmp, label %for.body, label %for.end, !dbg !12
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !15
+}
+
+declare float @_Z3barv() optsize
+
+declare float @_Z2f2v() optsize
+
+declare float @_Z2f3f(float) optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv._Z3foov = !{!5, !8}
+
+!0 = metadata !{i32 589841, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590080, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 589835, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590080, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 589835, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 589835, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 6, i32 18, metadata !6, null}
+!12 = metadata !{i32 7, i32 3, metadata !6, null}
+!13 = metadata !{i32 8, i32 20, metadata !9, null}
+!14 = metadata !{i32 7, i32 20, metadata !10, null}
+!15 = metadata !{i32 10, i32 1, metadata !6, null}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
deleted file mode 100644
index 34313aa..0000000
--- a/test/CodeGen/ARM/divmod.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
-
-define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
-entry:
-; CHECK: foo:
-; CHECK: bl ___divmodsi4
-; CHECK-NOT: bl ___divmodsi4
-  %div = sdiv i32 %x, %y
-  store i32 %div, i32* %P, align 4
-  %rem = srem i32 %x, %y
-  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
-  store i32 %rem, i32* %arrayidx6, align 4
-  ret void
-}
-
-define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
-entry:
-; CHECK: bar:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: bl ___udivmodsi4
-  %div = udiv i32 %x, %y
-  store i32 %div, i32* %P, align 4
-  %rem = urem i32 %x, %y
-  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
-  store i32 %rem, i32* %arrayidx6, align 4
-  ret void
-}
-
-; rdar://9280991
-@flags = external unnamed_addr global i32
-@tabsize = external unnamed_addr global i32
-
-define void @do_indent(i32 %cols) nounwind {
-entry:
-; CHECK: do_indent:
-  %0 = load i32* @flags, align 4
-  %1 = and i32 %0, 67108864
-  %2 = icmp eq i32 %1, 0
-  br i1 %2, label %bb1, label %bb
-
-bb:
-; CHECK: bl ___divmodsi4
-  %3 = load i32* @tabsize, align 4
-  %4 = srem i32 %cols, %3
-  %5 = sdiv i32 %cols, %3
-  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
-  %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
-  br label %bb1
-
-bb1:
-  %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ]
-  %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0
-  store i8 0, i8* %8, align 1
-  ret void
-}
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
-declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
new file mode 100644
index 0000000..e475508
--- /dev/null
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+target triple = "armv6-apple-macosx10.6"
+
+declare void @func()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+define void @test0() {
+entry:
+  invoke void @func()
+    to label %cont unwind label %lpad
+
+cont:
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception()
+  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
+  unreachable
+}
+
+; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
new file mode 100644
index 0000000..aa06299
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; rdar://9515076
+; (Make sure this doesn't crash.)
+
+define i32 @test(i32 %i) {
+  %t = trunc i32 %i to i4
+  %r = sext i4 %t to i32
+  ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 6aad92f..499c97f 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -61,3 +61,100 @@ b3:
 ; THUMB: orr {{.*}} #4
 ; ARM: orr {{.*}} #4
 }
+
+define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
+; THUMB: test3:
+; ARM: test3:
+
+bb1:
+  %a1 = trunc i32 %tmp to i16
+  %a2 = trunc i16 %a1 to i8
+  %a3 = trunc i8 %a2 to i1
+  %a4 = zext i1 %a3 to i8
+  store i8 %a4, i8* %ptr3
+  %a5 = zext i8 %a4 to i16
+  store i16 %a5, i16* %ptr2
+  %a6 = zext i16 %a5 to i32
+  store i32 %a6, i32* %ptr1
+  br label %bb2
+
+; THUMB: and
+; THUMB: strb
+; THUMB: uxtb
+; THUMB: strh
+; THUMB: uxth
+; ARM: and
+; ARM: strb
+; ARM: uxtb
+; ARM: strh
+; ARM: uxth
+
+bb2:
+  %b1 = trunc i32 %tmp to i16
+  %b2 = trunc i16 %b1 to i8
+  store i8 %b2, i8* %ptr3
+  %b3 = sext i8 %b2 to i16
+  store i16 %b3, i16* %ptr2
+  %b4 = sext i16 %b3 to i32
+  store i32 %b4, i32* %ptr1
+  br label %bb3
+
+; THUMB: strb
+; THUMB: sxtb
+; THUMB: strh
+; THUMB: sxth
+; ARM: strb
+; ARM: sxtb
+; ARM: strh
+; ARM: sxth
+
+bb3:
+  %c1 = load i8* %ptr3
+  %c2 = load i16* %ptr2
+  %c3 = load i32* %ptr1
+  %c4 = zext i8 %c1 to i32
+  %c5 = sext i16 %c2 to i32
+  %c6 = add i32 %c4, %c5
+  %c7 = sub i32 %c3, %c6
+  store i32 %c7, i32* %ptr1
+  ret void
+
+; THUMB: ldrb
+; THUMB: ldrh
+; THUMB: uxtb
+; THUMB: sxth
+; THUMB: add
+; THUMB: sub
+; ARM: ldrb
+; ARM: ldrh
+; ARM: uxtb
+; ARM: sxth
+; ARM: add
+; ARM: sub
+}
+
+; Check loads/stores with globals
+@test4g = external global i32
+
+define void @test4() {
+  %a = load i32* @test4g
+  %b = add i32 %a, 1
+  store i32 %b, i32* @test4g
+  ret void
+
+; THUMB: ldr.n r0, LCPI4_1
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r0, [r0]
+; THUMB: adds r0, #1
+; THUMB: ldr.n r1, LCPI4_0
+; THUMB: ldr r1, [r1]
+; THUMB: str r0, [r1]
+
+; ARM: ldr r0, LCPI4_1
+; ARM: ldr r0, [r0]
+; ARM: ldr r0, [r0]
+; ARM: add r0, r0, #1
+; ARM: ldr r1, LCPI4_0
+; ARM: ldr r1, [r1]
+; ARM: str r0, [r1]
+}
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index f241c26..c4dbeb9 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -10,7 +10,7 @@ entry:
 
 ; HARD: test1:
 ; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
-; HARD: vbsl [[REG1]], d2, d0
+; HARD: vbsl [[REG1]], d
   %0 = tail call float @copysignf(float %x, float %y) nounwind
   ret float %0
 }
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 9facf20..6081712 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -29,7 +29,7 @@ entry:
 ; NEON: vnmla.f32
 
 ; A8: t2:
-; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}}
+; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
 ; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 9d6eba8..58687b9 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -23,3 +23,38 @@ entry:
   %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
   ret void
 }
+
+; Radar 9306086
+
+%0 = type { <8 x i8>, <16 x i8>* }
+
+define hidden void @conv4_8_E() nounwind {
+entry:
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+unreachable
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t3() nounwind {
+entry:
+tail call void asm sideeffect "flds s15, $0 \0A", "^Uv|m,~{s15}"(float 1.000000e+00) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+@k.2126 = internal unnamed_addr global float 1.000000e+00
+define i32 @t4() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t5() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
diff --git a/test/CodeGen/ARM/intrinsics.ll b/test/CodeGen/ARM/intrinsics.ll
new file mode 100644
index 0000000..54cc3e0
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=thumb -mtriple=thumbv7-eabi -mcpu=cortex-a8 | FileCheck %s
+
+define void @coproc() nounwind {
+entry:
+  ; CHECK: mrc
+  %0 = tail call i32 @llvm.arm.mrc(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr
+  tail call void @llvm.arm.mcr(i32 7, i32 1, i32 %0, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mrc2
+  %1 = tail call i32 @llvm.arm.mrc2(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr2
+  tail call void @llvm.arm.mcr2(i32 7, i32 1, i32 %1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcrr
+  tail call void @llvm.arm.mcrr(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: mcrr2
+  tail call void @llvm.arm.mcrr2(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: cdp
+  tail call void @llvm.arm.cdp(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ; CHECK: cdp2
+  tail call void @llvm.arm.cdp2(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ret void
+}
+
+declare void @llvm.arm.cdp2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.cdp(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc(i32, i32, i32, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 2d016f6..1c69e15 100644
--- a/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -10,8 +10,8 @@ entry:
   br i1 %0, label %return, label %bb
 
 bb:
-; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
-; CHECK: str [[REGISTER]], [r2], #4
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [{{r[0-9]+}}], {{r[0-9]+}}
+; CHECK: str [[REGISTER]], [{{r[0-9]+}}], #4
   %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
   %tmp = mul i32 %j.05, %index
   %uglygep = getelementptr i8* %src6, i32 %tmp
diff --git a/test/CodeGen/ARM/ldstrexd.ll b/test/CodeGen/ARM/ldstrexd.ll
new file mode 100644
index 0000000..0c0911a
--- /dev/null
+++ b/test/CodeGen/ARM/ldstrexd.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%0 = type { i32, i32 }
+
+; CHECK: f0:
+; CHECK: ldrexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK: f1:
+; CHECK: strexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
new file mode 100644
index 0000000..e3e6eae
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -0,0 +1,80 @@
+; RUN: llc -regalloc=greedy < %s | FileCheck %s
+
+; LSR shouldn't introduce more induction variables than needed, increasing
+; register pressure and therefore spilling. There is more room for improvement
+; here.
+
+; CHECK: sub sp, #{{32|24}}
+
+; CHECK:      ldr r{{.*}}, [sp, #4]
+; CHECK-NEXT: ldr r{{.*}}, [sp, #16]
+; CHECK-NEXT: ldr r{{.*}}, [sp, #12]
+; CHECK-NEXT: adds
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%struct.partition_entry = type { i32, i32, i64, i64 }
+
+define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {
+entry:
+  %cmp79 = icmp sgt i32 %num_entries, 0
+  br i1 %cmp79, label %outer.loop, label %for.end72
+
+outer.loop:                                 ; preds = %for.inc69, %entry
+  %overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
+  %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
+  %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
+  %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
+  %tmp5 = load i64* %offset, align 4, !tbaa !0
+  %tmp15 = load i64* %len, align 4, !tbaa !0
+  %add = add nsw i64 %tmp15, %tmp5
+  br label %inner.loop
+
+inner.loop:                                       ; preds = %for.inc, %outer.loop
+  %overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]
+  %1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]
+  %cmp23 = icmp eq i32 %0, %1
+  br i1 %cmp23, label %for.inc, label %if.end
+
+if.end:                                           ; preds = %inner.loop
+  %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
+  %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
+  %tmp29 = load i64* %offset28, align 4, !tbaa !0
+  %tmp40 = load i64* %len39, align 4, !tbaa !0
+  %add41 = add nsw i64 %tmp40, %tmp29
+  %cmp44 = icmp sge i64 %tmp29, %tmp5
+  %cmp47 = icmp slt i64 %tmp29, %add
+  %or.cond = and i1 %cmp44, %cmp47
+  %overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178
+  %cmp52 = icmp sle i64 %add41, %add
+  %cmp56 = icmp sgt i64 %add41, %tmp5
+  %or.cond74 = and i1 %cmp52, %cmp56
+  %overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2
+  %cmp61 = icmp sgt i64 %tmp29, %tmp5
+  %cmp65 = icmp slt i64 %add41, %add
+  %or.cond75 = or i1 %cmp61, %cmp65
+  br i1 %or.cond75, label %for.inc, label %if.then66
+
+if.then66:                                        ; preds = %if.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end, %if.then66, %inner.loop
+  %overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]
+  %inc = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %inc, %num_entries
+  br i1 %exitcond, label %for.inc69, label %inner.loop
+
+for.inc69:                                        ; preds = %for.inc
+  %inc71 = add nsw i32 %0, 1
+  %exitcond83 = icmp eq i32 %inc71, %num_entries
+  br i1 %exitcond83, label %for.end72, label %outer.loop
+
+for.end72:                                        ; preds = %for.inc69, %entry
+  %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
+  ret i32 %overlap.0.lcssa
+}
+
+!0 = metadata !{metadata !"long long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 41d5944..032129d 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,10 +1,26 @@
-; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -o - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
+
+@from = common global [500 x i32] zeroinitializer, align 4
+@to = common global [500 x i32] zeroinitializer, align 4
 
 define void @f() {
 entry:
-        call void @llvm.memmove.i32( i8* null, i8* null, i32 64, i32 0 )
-        call void @llvm.memcpy.i32( i8* null, i8* null, i32 64, i32 0 )
-        call void @llvm.memset.i32( i8* null, i8 64, i32 0, i32 0 )
+
+        ; CHECK: memmove
+        ; EABI: __aeabi_memmove
+        call void @llvm.memmove.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+
+        ; CHECK: memcpy
+        ; EABI: __aeabi_memcpy
+        call void @llvm.memcpy.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+
+        ; EABI memset swaps arguments
+        ; CHECK: mov r1, #0
+        ; CHECK: memset
+        ; EABI: mov r2, #0
+        ; EABI: __aeabi_memset
+        call void @llvm.memset.i32( i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0 )
         unreachable
 }
 
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index 886ff3f..991d728 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -1,20 +1,39 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "armv7-eabi"
+; RUN: llc < %s -mtriple=armv7-eabi      | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic            | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static         | FileCheck %s -check-prefix=IOS-STATIC
 
-@foo = common global i32 0                        ; <i32*> [#uses=1]
+@foo = common global i32 0
 
-define arm_aapcs_vfpcc i32* @bar1() nounwind readnone {
+define i32* @bar1() nounwind readnone {
 entry:
-; CHECK:      movw    r0, :lower16:foo
-; CHECK-NEXT: movt    r0, :upper16:foo
+; EABI:      movw    r0, :lower16:foo
+; EABI-NEXT: movt    r0, :upper16:foo
+
+; IOS:      movw    r0, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r0, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+; IOS-PIC-NEXT: movt    r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+
+; IOS-STATIC-NOT:      movw    r0, :lower16:_foo
+; IOS-STATIC-NOT:       movt    r0, :upper16:_foo
   ret i32* @foo
 }
 
-define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind {
+define void @bar2(i32 %baz) nounwind {
 entry:
-; CHECK:      movw    r1, :lower16:foo
-; CHECK-NEXT: movt    r1, :upper16:foo
+; EABI:      movw    r1, :lower16:foo
+; EABI-NEXT: movt    r1, :upper16:foo
+
+; IOS:      movw    r1, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r1, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+; IOS-PIC-NEXT: movt    r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+
+; IOS-STATIC-NOT:      movw    r1, :lower16:_foo
+; IOS-STATIC-NOT:      movt    r1, :upper16:_foo
   store i32 %baz, i32* @foo, align 4
   ret void
 }
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 82ed018..43f8a66 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -5,8 +5,8 @@
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
 ; ARM: t1:
-; ARM: mov r1, #101
-; ARM: orr r1, r1, #1, #24
+; ARM: mov [[R1:r[0-9]+]], #101
+; ARM: orr [[R1b:r[0-9]+]], [[R1]], #1, #24
 ; ARM: movgt r0, #123
 
 ; ARMT2: t1:
@@ -34,7 +34,7 @@ entry:
 ; ARMT2: movwgt r0, #357
 
 ; THUMB2: t2:
-; THUMB2: mov.w r0, #123
+; THUMB2: mov{{(s|\.w)}} r0, #123
 ; THUMB2: movwgt r0, #357
 
   %0 = icmp sgt i32 %c, 1
@@ -53,7 +53,7 @@ entry:
 ; ARMT2: moveq r0, #1
 
 ; THUMB2: t3:
-; THUMB2: mov.w r0, #0
+; THUMB2: mov{{(s|\.w)}} r0, #0
 ; THUMB2: moveq r0, #1
   %0 = icmp eq i32 %a, 160
   %1 = zext i1 %0 to i32
@@ -67,11 +67,11 @@ entry:
 ; ARM: movlt
 
 ; ARMT2: t4:
-; ARMT2: movwlt r0, #65365
-; ARMT2: movtlt r0, #65365
+; ARMT2: movwlt [[R0:r[0-9]+]], #65365
+; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w r0, #11141290
+; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 2f5fadb..82dc14d 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: push
-; CHECK: stmib
+; CHECK: stm
 	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 805aad5..0d7d4ec 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -125,7 +125,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK: vld2lanei32_update:
-;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -153,7 +153,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2laneQi16:
 ;Check the (default) alignment.
-;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
+;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
@@ -166,7 +166,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -222,7 +222,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3lanei16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
-;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -265,7 +265,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3laneQi16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
@@ -280,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vld3laneQi16_update:
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
@@ -344,7 +344,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
@@ -360,7 +360,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8_update:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -380,7 +380,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4lanei16:
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
-;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
@@ -398,7 +398,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
@@ -431,7 +431,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
@@ -448,7 +448,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4laneQi32:
 ;Check the (default) alignment.
-;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index f0f9e4e..34acd16 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -147,3 +147,34 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   store <4 x float> %tmp8, <4 x float>* %v, align 16
   ret void
 }
+
+; vrev <4 x i16> should use VREV32 and not VREV64
+define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
+; CHECK: test_vrev64:
+; CHECK: vext.16
+; CHECK: vrev32.16
+entry:
+  %0 = bitcast <4 x i16>* %source to <8 x i16>*
+  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
+  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
+  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
+  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
+  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
+  ret void
+}
+
+; Test vrev of float4
+define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
+; CHECK: float_vrev64
+; CHECK: vext.32
+; CHECK: vrev64.32
+entry:
+  %0 = bitcast float* %source to <4 x float>*
+  %tmp2 = load <4 x float>* %0, align 4
+  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
+  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index d1bc15a..08b7232 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -54,7 +54,8 @@ define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1laneQi8:
-;CHECK: vst1.8 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.8 {d17[1]}, [r0]
 	%tmp1 = load <16 x i8>* %B
         %tmp2 = extractelement <16 x i8> %tmp1, i32 9
         store i8 %tmp2, i8* %A, align 8
@@ -72,7 +73,8 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32:
-;CHECK: vst1.32 {d17[1]}, [r0, :32]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
 	%tmp1 = load <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
@@ -82,7 +84,8 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32_update:
-;CHECK: vst1.32 {d17[1]}, [r1, :32]!
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
 	%A = load i32** %ptr
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
@@ -94,7 +97,8 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 
 define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst1laneQf:
-;CHECK: vst1.32 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0]
 	%tmp1 = load <4 x float>* %B
         %tmp2 = extractelement <4 x float> %tmp1, i32 3
         store float %tmp2, float* %A
diff --git a/test/CodeGen/Generic/promote-integers.ll b/test/CodeGen/Generic/promote-integers.ll
new file mode 100644
index 0000000..5812592
--- /dev/null
+++ b/test/CodeGen/Generic/promote-integers.ll
@@ -0,0 +1,15 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc -march=x86 -promote-elements < %s | FileCheck %s
+
+; This test is the poster-child for integer-element-promotion.
+; Until this feature is complete, we mark this test as expected to fail.
+; XFAIL: *
+; CHECK: vector_code
+; CHECK: ret
+define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 )  {
+   %C = icmp eq <4 x i64> %A, %B
+   %K = xor <4 x i1> <i1 1, i1 1, i1 1, i1 1>, %C
+   %D = select <4 x i1> %K, <4 x float> %R1, <4 x float> %R0
+   ret <4 x float> %D
+}
+
diff --git a/test/CodeGen/Generic/zero-sized-array.ll b/test/CodeGen/Generic/zero-sized-array.ll
new file mode 100644
index 0000000..280ba00
--- /dev/null
+++ b/test/CodeGen/Generic/zero-sized-array.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s
+; PR9900
+
+%zero = type [0 x i8]
+%foobar = type { i32, %zero }
+
+define void @f(%foobar %arg) {
+  %arg1 = extractvalue %foobar %arg, 0
+  %arg2 = extractvalue %foobar %arg, 1
+  call i32 @f2(%zero %arg2, i32 5, i32 42)
+  ret void
+}
+
+define i32 @f2(%zero %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @f3(%zero %x, i32 %y) {
+  call i32 @f2(%zero %x, i32 5, i32 %y)
+  ret void
+}
+
+define void @f4(%zero %z) {
+  insertvalue %foobar undef, %zero %z, 1
+  ret void
+}
+
+define void @f5(%foobar %x) {
+allocas:
+  %y = extractvalue %foobar %x, 1
+  br  label %b1
+
+b1:
+  %insert120 = insertvalue %foobar undef, %zero %y, 1
+  ret void
+}
+
+define void @f6(%zero %x, %zero %y) {
+b1:
+  br i1 undef, label %end, label %b2
+
+b2:
+  br label %end
+
+end:
+  %z = phi %zero [ %y, %b1 ], [ %x, %b2 ]
+  call void @f4(%zero %z)
+  ret void
+}
+
+%zero2 = type {}
+
+define i32 @g1(%zero2 %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @g2(%zero2 %x, i32 %y) {
+  call i32 @g1(%zero2 %x, i32 5, i32 %y)
+  ret void
+}
+
+%zero2r = type {%zero2}
+
+define i32 @h1(%zero2r %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @h2(%zero2r %x, i32 %y) {
+  call i32 @h1(%zero2r %x, i32 5, i32 %y)
+  ret void
+}
+
+%foobar2 = type { i32, %zero2r }
+
+define void @h3(%foobar2 %arg) {
+  %arg1 = extractvalue %foobar2 %arg, 0
+  %arg2 = extractvalue %foobar2 %arg, 1
+  %arg21 = extractvalue %zero2r %arg2, 0
+  call void @g2(%zero2 %arg21, i32 5)
+  ret void
+}
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 0f5fc12..45342e2 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -10,7 +10,7 @@ define i8 @mov(i8 %a, i8 %b) nounwind {
 
 define i8 @add(i8 %a, i8 %b) nounwind {
 ; CHECK: add:
-; CHECK: add.b	r12, r15
+; CHECK: add.b
 	%1 = add i8 %a, %b
 	ret i8 %1
 }
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index 47382f9..f152acc 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -2,6 +2,10 @@
 ; RUN: grep abs.s  %t | count 1
 ; RUN: grep neg.s %t | count 1
 
+; FIXME: Should not emit abs.s or neg.s since these instructions produce
+;        incorrect results if the operand is NaN.
+; REQUIRES: disabled
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
diff --git a/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll b/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll
new file mode 100644
index 0000000..1255949
--- /dev/null
+++ b/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -verify-coalescing
+; PR10046
+;
+; PHI elimination splits the critical edge from %while.end415 to %if.end427.
+; This requires updating the BNE-J terminators to a BEQ. The BNE instruction
+; kills a virtual register, and LiveVariables must be updated with the new kill
+; instruction.
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-ellcc-linux"
+
+define i32 @mergesort(i8* %base, i32 %nmemb, i32 %size, i32 (i8*, i8*)* nocapture %cmp) nounwind {
+entry:
+  br i1 undef, label %return, label %if.end13
+
+if.end13:                                         ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %if.end13
+  %list1.0482 = phi i8* [ %base, %if.end13 ], [ null, %while.body ]
+  br i1 undef, label %while.end415, label %while.body
+
+while.end415:                                     ; preds = %while.body
+  br i1 undef, label %if.then419, label %if.end427
+
+if.then419:                                       ; preds = %while.end415
+  %call425 = tail call i8* @memmove(i8* %list1.0482, i8* undef, i32 undef) nounwind
+  br label %if.end427
+
+if.end427:                                        ; preds = %if.then419, %while.end415
+  %list2.1 = phi i8* [ undef, %if.then419 ], [ %list1.0482, %while.end415 ]
+  tail call void @free(i8* %list2.1)
+  unreachable
+
+return:                                           ; preds = %entry
+  ret i32 -1
+}
+
+
+declare i8* @memmove(i8*, i8*, i32)
+
+declare void @free(i8*)
+
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
new file mode 100644
index 0000000..50eeecf
--- /dev/null
+++ b/test/CodeGen/Mips/alloca.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+define i32 @twoalloca(i32 %size) nounwind {
+entry:
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addu  $[[SP1:[0-9]+]], $zero, $sp
+; CHECK: subu  $[[T1:[0-9]+]], $sp, $[[SZ]]
+; CHECK: addu  $sp, $zero, $[[T1]]
+; CHECK: addu  $[[SP2:[0-9]+]], $zero, $sp
+; CHECK: lw  $25, %call16(foo)($gp)
+; CHECK: addiu $4, $[[SP1]], 24
+; CHECK: jalr  $25
+; CHECK: lw  $25, %call16(foo)($gp)
+; CHECK: addiu $4, $[[SP2]], 24
+; CHECK: jalr  $25
+  %tmp1 = alloca i8, i32 %size, align 4
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
+  store i8 97, i8* %add.ptr, align 1
+  %tmp4 = alloca i8, i32 %size, align 4
+  call void @foo2(double 1.000000e+00, double 2.000000e+00, i32 3) nounwind
+  %call = call i32 @foo(i8* %tmp1) nounwind
+  %call7 = call i32 @foo(i8* %tmp4) nounwind
+  %add = add nsw i32 %call7, %call
+  ret i32 %add
+}
+
+declare void @foo2(double, double, i32)
+
+declare i32 @foo(i8*)
+
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
new file mode 100644
index 0000000..2d5555b
--- /dev/null
+++ b/test/CodeGen/Mips/atomic.ll
@@ -0,0 +1,253 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
+
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.swap.i8.p0i8(i8* nocapture, i8) nounwind
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
+
+
+@x = common global i32 0, align 4
+
+define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* @x, i32 %incr)
+  ret i32 %0
+
+; CHECK:   AtomicLoadAdd32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* @x, i32 %incr)
+  ret i32 %0
+
+; CHECK:   AtomicLoadNand32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   and     $[[R1]], $[[R1]], $4
+; CHECK:   nor     $[[R2:[0-9]+]], $zero, $[[R1]]
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicSwap32(i32 %oldval) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval)
+  ret i32 %0
+
+; CHECK:   AtomicSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   sw      $4, [[OFFSET:[0-9]+]]($sp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   or      $2, $zero, $[[R1]]
+; CHECK:   lw      $[[R2:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R3:[0-9]+]], $zero, $[[R2]]
+; CHECK:   sc      $[[R3]], 0($[[R0]])
+; CHECK:   beq     $[[R3]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+entry:
+  %0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval)
+  ret i32 %0
+
+; CHECK:   AtomicCmpSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   sw      $5, [[OFFSET:[0-9]+]]($sp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $2, 0($[[R0]])
+; CHECK:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; CHECK:   lw      $[[R1:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R2:[0-9]+]], $zero, $[[R1]]
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK:   $[[BB1]]:
+}
+
+
+
+@y = common global i8 0, align 1
+
+define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadAdd8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadSub8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   subu    $[[R18:[0-9]+]], $zero, $4
+; CHECK:   andi    $[[R8:[0-9]+]], $[[R18]], 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @y, i8 %incr)
+  ret i8 %0
+
+; CHECK:   AtomicLoadNand8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval)
+  ret i8 %0
+
+; CHECK:   AtomicSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK:   sw      $[[R9]], [[OFFSET:[0-9]+]]($sp)
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   lw      $[[R18:[0-9]+]], [[OFFSET]]($sp)
+; CHECK:   or      $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
+entry:
+  %0 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @y, i8 %oldval, i8 %newval)
+  ret i8 %0
+
+; CHECK:   AtomicCmpSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sll     $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sll     $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK:   andi    $[[R10:[0-9]+]], $5, 255
+; CHECK:   sll     $[[R11:[0-9]+]], $[[R10]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R12:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
+; CHECK:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
+
+; CHECK:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
+; CHECK:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
+; CHECK:   sc      $[[R15]], 0($[[R2]])
+; CHECK:   beq     $[[R15]], $zero, $[[BB0]]
+
+; CHECK:   $[[BB1]]:
+; CHECK:   srl     $[[R16:[0-9]+]], $[[R13]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index e9af304..6de6b77 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -8,14 +8,14 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp1)($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp1)
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp2)($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp2)
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp1)
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp1)
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp2)
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp2)
+; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
+; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 8329c89..ec37961 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -4,8 +4,8 @@
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
 ; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -14,3 +14,19 @@ entry:
   ret i32* %cond
 }
 
+@c = global i32 1, align 4
+@d = global i32 0, align 4
+
+; CHECK: cmov2:
+; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+define i32 @cmov2(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32* @c, align 4
+  %tmp2 = load i32* @d, align 4
+  %cond = select i1 %tobool, i32 %tmp1, i32 %tmp2
+  ret i32 %cond
+}
+
diff --git a/test/CodeGen/Mips/double2int.ll b/test/CodeGen/Mips/double2int.ll
new file mode 100644
index 0000000..3d033e1
--- /dev/null
+++ b/test/CodeGen/Mips/double2int.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+define i32 @f1(double %d) nounwind readnone {
+entry:
+; CHECK: trunc.w.d $f{{[0-9]+}}, $f12
+  %conv = fptosi double %d to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
new file mode 100644
index 0000000..765b778
--- /dev/null
+++ b/test/CodeGen/Mips/eh.ll
@@ -0,0 +1,78 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips   -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EB
+
+@g1 = global double 0.000000e+00, align 8
+@_ZTId = external constant i8*
+
+define void @_Z1fd(double %i2) {
+entry:
+; CHECK-EL:  addiu $sp, $sp
+; CHECK-EL:  .cfi_def_cfa_offset
+; CHECK-EL:  sdc1 $f20
+; CHECK-EL:  sw  $ra
+; CHECK-EL:  sw  $17
+; CHECK-EL:  sw  $16
+; CHECK-EL:  .cfi_offset 52, -8
+; CHECK-EL:  .cfi_offset 53, -4
+; CHECK-EB:  .cfi_offset 53, -8
+; CHECK-EB:  .cfi_offset 52, -4
+; CHECK-EL:  .cfi_offset 31, -12
+; CHECK-EL:  .cfi_offset 17, -16
+; CHECK-EL:  .cfi_offset 16, -20
+; CHECK-EL:  .cprestore 
+
+  %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
+  %0 = bitcast i8* %exception to double*
+  store double 3.200000e+00, double* %0, align 8, !tbaa !0
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTId to i8*), i8* null) noreturn
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+; CHECK-EL:  # %lpad
+; CHECK-EL:  lw  $gp
+; CHECK-EL:  beq $5
+
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTId to i8*)) nounwind
+  %1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTId to i8*)) nounwind
+  %2 = icmp eq i32 %eh.selector, %1
+  br i1 %2, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  %4 = bitcast i8* %3 to double*
+  %exn.scalar = load double* %4, align 8
+  %add = fadd double %exn.scalar, %i2
+  store double %add, double* @g1, align 8, !tbaa !0
+  tail call void @__cxa_end_catch() nounwind
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  tail call void @llvm.eh.resume(i8* %exn, i32 %eh.selector) noreturn
+  unreachable
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
new file mode 100644
index 0000000..14c6507
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips -mcpu=4ke | FileCheck %s -check-prefix=CHECK-EB
+
+define double @func0(double %d0, double %d1) nounwind readnone {
+entry:
+; CHECK-EL: func0:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
+; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EL: mtc1 $[[LO0]], $f0
+; CHECK-EL: mtc1 $[[OR]], $f1
+;
+; CHECK-EB: lui $[[T0:[0-9]+]], 32767
+; CHECK-EB: lui $[[T1:[0-9]+]], 32768
+; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
+; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
+; CHECK-EB: mtc1 $[[OR]], $f0
+; CHECK-EB: mtc1 $[[LO0]], $f1
+  %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
+  ret double %call
+}
+
+declare double @copysign(double, double) nounwind readnone
+
+define float @func1(float %f0, float %f1) nounwind readnone {
+entry:
+; CHECK-EL: func1:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
+; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
+; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; CHECK-EL: mtc1 $[[T4]], $f0
+  %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
+  ret float %call
+}
+
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
new file mode 100644
index 0000000..c48ce7e
--- /dev/null
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @f() nounwind {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK:   addu    $fp, $sp, $zero
+; CHECK:   addu    $2, $zero, $fp
+}
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
new file mode 100644
index 0000000..ee7e131
--- /dev/null
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@p = external global i32
+@q = external global i32
+@r = external global i32
+
+define void @f0() nounwind {
+entry:
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+  tail call void (...)* @f1() nounwind
+  %tmp = load i32* @p, align 4
+  tail call void @f2(i32 %tmp) nounwind
+  %tmp1 = load i32* @q, align 4
+  %tmp2 = load i32* @r, align 4
+  tail call void @f3(i32 %tmp1, i32 %tmp2) nounwind
+  ret void
+}
+
+declare void @f1(...)
+
+declare void @f2(i32)
+
+declare void @f3(i32, i32)
+
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
new file mode 100644
index 0000000..9a30453
--- /dev/null
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
+entry:
+; CHECK: addu $[[R1:[0-9]+]], $zero, $5
+; CHECK: addu $[[R0:[0-9]+]], $zero, $4
+; CHECK: lw  $25, %call16(ff1)
+; CHECK: ori $6, ${{[0-9]+}}, 3855
+; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: jalr
+  tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
+; CHECK: lw $25, %call16(ff2)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
+; CHECK: addu $4, $zero, $[[R2]]
+; CHECK: addu $5, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
+  %sub = add nsw i32 %i, -1
+; CHECK: sw $[[R0]], 24($sp)
+; CHECK: sw $[[R1]], 28($sp)
+; CHECK: lw $25, %call16(ff3)
+; CHECK: addu $6, $zero, $[[R2]]
+; CHECK: addu $7, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
+  ret void
+}
+
+declare void @ff1(i32, i64)
+
+declare void @ff2(i64, double)
+
+declare void @ff3(i32, i64, i32, i64)
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index fdfa01a..50d0993 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc  < %s -march=mips | FileCheck %s
+; RUN: llc  < %s -march=mipsel -mcpu=4ke  | FileCheck %s
 
 @caller.sf1 = internal unnamed_addr global void (...)* null, align 4
 @gf1 = external global void (...)*
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
new file mode 100644
index 0000000..fd7ae9e
--- /dev/null
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+%struct.S1 = type { [65536 x i8] }
+
+@s1 = external global %struct.S1
+
+define void @f() nounwind {
+entry:
+; CHECK:  lui $at, 65534
+; CHECK:  addu  $at, $sp, $at
+; CHECK:  addiu $sp, $at, -16
+; CHECK:  .cprestore  65536
+
+  %agg.tmp = alloca %struct.S1, align 1
+  %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
+  call void @f2(%struct.S1* byval %agg.tmp) nounwind
+  ret void
+}
+
+declare void @f2(%struct.S1* byval)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
new file mode 100644
index 0000000..b78c393
--- /dev/null
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -0,0 +1,127 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+%0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
+%struct.S1 = type { i8, i16, i32, i64, double, i32 }
+%struct.S2 = type { [4 x i32] }
+%struct.S3 = type { i8 }
+
+@f1.s1 = internal unnamed_addr constant %0 { i8 1, i16 2, i32 3, i64 4, double 5.000000e+00, i32 6, [4 x i8] undef }, align 8
+@f1.s2 = internal unnamed_addr constant %struct.S2 { [4 x i32] [i32 7, i32 8, i32 9, i32 10] }, align 4
+
+define void @f1() nounwind {
+entry:
+; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
+; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
+; CHECK: sw  $[[R2]], 16($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R6]], 36($sp)
+; CHECK: lw  $6, 0($[[R0]])
+; CHECK: lw  $7, 4($[[R0]])
+  %agg.tmp10 = alloca %struct.S3, align 4
+  call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  call void @callee2(%struct.S2* byval @f1.s2) nounwind
+  %tmp11 = getelementptr inbounds %struct.S3* %agg.tmp10, i32 0, i32 0
+  store i8 11, i8* %tmp11, align 4
+  call void @callee3(float 2.100000e+01, %struct.S3* byval %agg.tmp10, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  ret void
+}
+
+declare void @callee1(float, %struct.S1* byval)
+
+declare void @callee2(%struct.S2* byval)
+
+declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
+
+define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
+; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
+; CHECK: lw  $4, 88($sp)
+; CHECK: sw  $[[R3]], 16($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: mfc1 $6, $f[[F0]]
+
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp = load i32* %i2, align 4, !tbaa !0
+  %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
+  %tmp1 = load double* %d, align 8, !tbaa !3
+  %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
+  %tmp2 = load i64* %ll, align 8, !tbaa !4
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp3 = load i32* %i, align 4, !tbaa !0
+  %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
+  %tmp4 = load i16* %s, align 2, !tbaa !5
+  %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
+  %tmp5 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
+  ret void
+}
+
+declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
+
+define void @f3(%struct.S2* nocapture byval %s2) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $4, 56($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: lw  $4, 56($sp)
+; CHECK: sw  $[[R0]], 24($sp)
+
+  %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
+  %tmp = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
+  %tmp3 = load i32* %arrayidx2, align 4, !tbaa !0
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
+  ret void
+}
+
+define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
+; CHECK: lw  $4, 68($sp)
+; CHECK: sw  $[[R1]], 24($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp = load i32* %i, align 4, !tbaa !0
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp1 = load i32* %i2, align 4, !tbaa !0
+  %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
+  %tmp2 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"long long", metadata !1}
+!5 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 1f71ed2..14ce04b 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
-; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
 
 
 ; All test functions do the same thing - they return the first variable
@@ -30,11 +29,11 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va1:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $7, 44($sp)
-; CHECK: sw      $6, 40($sp)
-; CHECK: sw      $5, 36($sp)
-; CHECK: lw      $2, 36($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: lw      $2, 20($sp)
 }
 
 ; check whether the variable double argument will be accessed from the 8-byte
@@ -56,11 +55,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va2:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: sw      $6, 48($sp)
-; CHECK: sw      $5, 44($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 20
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -84,10 +83,10 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va3:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: sw      $6, 48($sp)
-; CHECK: lw      $2, 48($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: lw      $2, 24($sp)
 }
 
 ; double
@@ -107,14 +106,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va4:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: sw      $7, 60($sp)
-; CHECK: sw      $6, 56($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 56
-; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
-; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
-; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
-; CHECK: ldc1    $f0, 0($[[R3]])
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: sw      $6, 32($sp)
+; CHECK: addiu   ${{[0-9]+}}, $sp, 32
+; CHECK: ldc1    $f0, 32($sp)
 }
 
 ; int
@@ -138,9 +134,9 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va5:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $7, 52($sp)
-; CHECK: lw      $2, 52($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: lw      $2, 36($sp)
 }
 
 ; double
@@ -164,9 +160,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va6:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: sw      $7, 60($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 60
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 36
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -192,8 +188,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va7:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: lw      $2, 56($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: lw      $2, 40($sp)
 }
 
 ; double
@@ -215,12 +211,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va8:
-; CHECK: addiu   $sp, $sp, -48
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 64
-; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
-; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
-; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
-; CHECK: ldc1    $f0, 0($[[R3]])
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   ${{[0-9]+}}, $sp, 48
+; CHECK: ldc1    $f0, 48($sp)
 }
 
 ; int
@@ -244,8 +237,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va9:
-; CHECK: addiu   $sp, $sp, -56
-; CHECK: lw      $2, 76($sp)
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: lw      $2, 52($sp)
 }
 
 ; double
@@ -269,8 +262,8 @@ entry:
   ret double %tmp
 
 ; CHECK: va10:
-; CHECK: addiu   $sp, $sp, -56
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 76
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 52
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
new file mode 100644
index 0000000..034738b
--- /dev/null
+++ b/test/CodeGen/Mips/tls.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=mipsel -mcpu=mips2 -relocation-model=static < %s \
+; RUN:                             | FileCheck %s -check-prefix=STATIC
+
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+; CHECK: f1:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t1)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
+; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
+; STATIC:   lw      $2, 0($[[R2]])
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+; CHECK: f2:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t2)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
+; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
+; STATIC:   lw      $2, 0($[[R1]])
+}
diff --git a/test/CodeGen/Mips/weak.ll b/test/CodeGen/Mips/weak.ll
new file mode 100644
index 0000000..09dd2a4
--- /dev/null
+++ b/test/CodeGen/Mips/weak.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@t = common global i32 (...)* null, align 4
+
+define void @f() nounwind {
+entry:
+  store i32 (...)* @test_weak, i32 (...)** @t, align 4
+  ret void
+}
+
+; CHECK: .weak test_weak
+declare extern_weak i32 @test_weak(...)
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
new file mode 100644
index 0000000..2f793de
--- /dev/null
+++ b/test/CodeGen/PTX/cvt.ll
@@ -0,0 +1,234 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; preds 
+; (note: we convert back to i32 to return)
+
+define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
+; CHECK: cvt.pred.u16 p0, rh1;
+; CHECK: ret;
+	%a = trunc i16 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
+; CHECK: cvt.pred.u32 p0, r1;
+; CHECK: ret;
+	%a = trunc i32 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
+; CHECK: cvt.pred.u64 p0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
+; CHECK: cvt.rni.pred.f32 p0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
+; CHECK: cvt.rni.pred.f64 p0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+; i16
+
+define ptx_device i16 @cvt_i16_preds(i1 %x) {
+; CHECK: cvt.u16.pred rh0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i32(i32 %x) {
+; CHECK: cvt.u16.u32 rh0, r1;
+; CHECK: ret;
+	%a = trunc i32 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i64(i64 %x) {
+; CHECK: cvt.u16.u64 rh0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f32(float %x) {
+; CHECK: cvt.rni.u16.f32 rh0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f64(double %x) {
+; CHECK: cvt.rni.u16.f64 rh0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i16
+	ret i16 %a
+}
+
+; i32
+
+define ptx_device i32 @cvt_i32_preds(i1 %x) {
+; CHECK: cvt.u32.pred r0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i16(i16 %x) {
+; CHECK: cvt.u32.u16 r0, rh1;
+; CHECK: ret;
+	%a = zext i16 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i64(i64 %x) {
+; CHECK: cvt.u32.u64 r0, rd1;
+; CHECK: ret;
+	%a = trunc i64 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f32(float %x) {
+; CHECK: cvt.rni.u32.f32 r0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f64(double %x) {
+; CHECK: cvt.rni.u32.f64 r0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i32
+	ret i32 %a
+}
+
+; i64
+
+define ptx_device i64 @cvt_i64_preds(i1 %x) {
+; CHECK: cvt.u64.pred rd0, p1;
+; CHECK: ret;
+	%a = zext i1 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i16(i16 %x) {
+; CHECK: cvt.u64.u16 rd0, rh1;
+; CHECK: ret;
+	%a = zext i16 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i32(i32 %x) {
+; CHECK: cvt.u64.u32 rd0, r1;
+; CHECK: ret;
+	%a = zext i32 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f32(float %x) {
+; CHECK: cvt.rni.u64.f32 rd0, f1;
+; CHECK: ret;
+	%a = fptoui float %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f64(double %x) {
+; CHECK: cvt.rni.u64.f64 rd0, fd1;
+; CHECK: ret;
+	%a = fptoui double %x to i64
+	ret i64 %a
+}
+
+; f32
+
+define ptx_device float @cvt_f32_preds(i1 %x) {
+; CHECK: cvt.rn.f32.pred f0, p1;
+; CHECK: ret;
+	%a = uitofp i1 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i16(i16 %x) {
+; CHECK: cvt.rn.f32.u16 f0, rh1;
+; CHECK: ret;
+	%a = uitofp i16 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i32(i32 %x) {
+; CHECK: cvt.rn.f32.u32 f0, r1;
+; CHECK: ret;
+	%a = uitofp i32 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i64(i64 %x) {
+; CHECK: cvt.rn.f32.u64 f0, rd1;
+; CHECK: ret;
+	%a = uitofp i64 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_f64(double %x) {
+; CHECK: cvt.rn.f32.f64 f0, fd1;
+; CHECK: ret;
+	%a = fptrunc double %x to float
+	ret float %a
+}
+
+; f64
+
+define ptx_device double @cvt_f64_preds(i1 %x) {
+; CHECK: cvt.rn.f64.pred fd0, p1;
+; CHECK: ret;
+	%a = uitofp i1 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i16(i16 %x) {
+; CHECK: cvt.rn.f64.u16 fd0, rh1;
+; CHECK: ret;
+	%a = uitofp i16 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i32(i32 %x) {
+; CHECK: cvt.rn.f64.u32 fd0, r1;
+; CHECK: ret;
+	%a = uitofp i32 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i64(i64 %x) {
+; CHECK: cvt.rn.f64.u64 fd0, rd1;
+; CHECK: ret;
+	%a = uitofp i64 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_f32(float %x) {
+; CHECK: cvt.f64.f32 fd0, f1;
+; CHECK: ret;
+	%a = fpext float %x to double
+	ret double %a
+}
diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll
new file mode 100644
index 0000000..22eeda3
--- /dev/null
+++ b/test/CodeGen/PTX/fneg.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x) {
+; CHECK: neg.f32 f0, f1;
+; CHECK-NEXT: ret;
+	%y = fsub float -0.000000e+00, %x
+	ret float %y
+}
+
+define ptx_device double @t1_f64(double %x) {
+; CHECK: neg.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+	%y = fsub double -0.000000e+00, %x
+	ret double %y
+}
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
new file mode 100644
index 0000000..ad7b341
--- /dev/null
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  ret double %b
+}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
index ac33fef..92effa6 100644
--- a/test/CodeGen/PTX/options.ll
+++ b/test/CodeGen/PTX/options.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
 ; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
 ; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
 ; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
 ; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
 ; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll
new file mode 100644
index 0000000..6f1b03e
--- /dev/null
+++ b/test/CodeGen/PTX/selp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
+; CHECK: selp.u32 r0, r1, r2, p1;
+	%a = select i1 %x, i32 %y, i32 %z
+	ret i32 %a
+}
+
+define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
+; CHECK: selp.u64 rd0, rd1, rd2, p1;
+	%a = select i1 %x, i64 %y, i64 %z
+	ret i64 %a
+}
+
+define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
+; CHECK: selp.f32 f0, f1, f2, p1;
+	%a = select i1 %x, float %y, float %z
+	ret float %a
+}
+
+define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
+; CHECK: selp.f64 fd0, fd1, fd2, p1;
+	%a = select i1 %x, double %y, double %z
+	ret double %a
+}
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index d094509..6b31397 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 -join-physregs | FileCheck %s
 ; ModuleID = 'nn.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin11.0"
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index e46e1ec..318ccb0 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -1,14 +1,12 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 6, 3}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 4, 2}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {li 3, 0}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN:   grep {mr 5, 3}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
 
 declare void @bar(i64 %x, i64 %y)
 
+; CHECK: li 4, 2
+; CHECK: li {{[53]}}, 0
+; CHECK: li 6, 3
+; CHECK: mr {{[53]}}, {{[53]}}
+
 define void @foo() {
   call void @bar(i64 2, i64 3)
   ret void
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ac56625..29c620e 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -relocation-model=pic -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=PIC
 ; RUN: llc < %s -relocation-model=static -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic -march=ppc64 -mtriple=powerpc64-apple-darwin | FileCheck %s -check-prefix=PPC64
 
 @nextaddr = global i8* null                       ; <i8**> [#uses=2]
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
@@ -7,6 +8,7 @@
 define internal i32 @foo(i32 %i) nounwind {
 ; PIC: foo:
 ; STATIC: foo:
+; PPC64: foo:
 entry:
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
@@ -18,6 +20,8 @@ bb2:                                              ; preds = %entry, %bb3
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
 ; STATIC-NEXT: bctr
+; PPC64: mtctr
+; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 bb3:                                              ; preds = %entry
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index 506d3a8..5393392 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -6,7 +6,6 @@ define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32
 entry:
 ;CHECK: make_foo
 ;CHECK: ld [%fp+64], {{.+}}
-;CHECK: or {{.+}}, {{.+}}, %i0
 ;CHECK: jmp %i7+12
   %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
   store i32 %a, i32* %0, align 4
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index d6ca0d7..7876557 100644
--- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -11,7 +11,7 @@
 
 define i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
 ; CHECK: t:
-; CHECK: adds r0, #8
+; CHECK: adds {{r[0-7]}}, #8
 entry:
   %val = alloca i64, align 4                      ; <i64*> [#uses=3]
   %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
new file mode 100644
index 0000000..ed55bb5
--- /dev/null
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s
+; rdar://problem/9416774
+; ModuleID = 'reduced.ll'
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-ios"
+
+%struct.MMMMMMMMMMMM = type { [4 x %struct.RRRRRRRR] }
+%struct.RRRRRRRR = type { [78 x i32] }
+
+@kkkkkk = external constant i8*
+@__PRETTY_FUNCTION__._ZN12CLGll = private unnamed_addr constant [62 x i8] c"static void tttttttttttt::lllllllllllll(const MMMMMMMMMMMM &)\00"
+@.str = private unnamed_addr constant [75 x i8] c"\09GGGGGGGGGGGGGGGGGGGGGGG:,BE:0x%08lx,ALM:0x%08lx,LTO:0x%08lx,CBEE:0x%08lx\0A\00"
+
+define void @_ZN12CLGll(%struct.MMMMMMMMMMMM* %aidData) ssp align 2 {
+entry:
+  %aidData.addr = alloca %struct.MMMMMMMMMMMM*, align 4
+  %agg.tmp = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp4 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp10 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp16 = alloca %struct.RRRRRRRR, align 4
+  store %struct.MMMMMMMMMMMM* %aidData, %struct.MMMMMMMMMMMM** %aidData.addr, align 4
+  br label %do.body
+
+do.body:                                          ; preds = %entry
+  %tmp = load i8** @kkkkkk, align 4
+  %tmp1 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
+  %tmp2 = bitcast %struct.RRRRRRRR* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.RRRRRRRR* %arrayidx to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 312, i32 4, i1 false)
+  %tmp5 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
+  %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
+  %tmp8 = bitcast %struct.RRRRRRRR* %agg.tmp4 to i8*
+  %tmp9 = bitcast %struct.RRRRRRRR* %arrayidx7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp8, i8* %tmp9, i32 312, i32 4, i1 false)
+  %tmp11 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
+  %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
+  %tmp14 = bitcast %struct.RRRRRRRR* %agg.tmp10 to i8*
+  %tmp15 = bitcast %struct.RRRRRRRR* %arrayidx13 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp14, i8* %tmp15, i32 312, i32 4, i1 false)
+  %tmp17 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
+  %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
+  %tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
+  %tmp21 = bitcast %struct.RRRRRRRR* %arrayidx19 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp20, i8* %tmp21, i32 312, i32 4, i1 false)
+  call void (i8*, i32, i8*, i8*, ...)* @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
+  br label %do.end
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+declare void @CLLoggingLog(i8*, i32, i8*, i8*, ...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 3594424..9aee910 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -12,7 +12,7 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
-; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: ldmia.w sp!,
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 41f7f29..47d7a9c 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -7,8 +7,8 @@ entry:
 ; CHECK: Callee:
 ; CHECK: push
 ; CHECK: mov r4, sp
-; CHECK: sub.w r12, r4, #1000
-; CHECK: mov sp, r12
+; CHECK: sub.w [[R12:r[0-9]+]], r4, #1000
+; CHECK: mov sp, [[R12]]
   %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
   br i1 %0, label %bb2, label %bb
 
diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
new file mode 100644
index 0000000..9e6d78e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
+
+; CHECK: Perl_ck_sort
+; CHECK: ldr
+; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]]
+; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+
+define void @Perl_ck_sort() nounwind optsize {
+entry:
+  %tmp27 = load %struct.op** undef, align 4
+  switch i16 undef, label %if.end151 [
+    i16 178, label %if.then60
+    i16 177, label %if.then60
+  ]
+
+if.then60:                                        ; preds = %if.then40
+  br i1 undef, label %if.then67, label %if.end95
+
+if.then67:                                        ; preds = %if.then60
+  %op_next71 = getelementptr inbounds %struct.op* %tmp27, i32 0, i32 0
+  store %struct.op* %tmp27, %struct.op** %op_next71, align 4
+  %0 = getelementptr inbounds %struct.op* %tmp27, i32 1, i32 0
+  br label %if.end95
+
+if.end95:                                         ; preds = %if.else92, %if.then67
+  %.pre-phi = phi %struct.op** [ undef, %if.then60 ], [ %0, %if.then67 ]
+  %tmp98 = load %struct.op** %.pre-phi, align 4
+  br label %if.end151
+
+if.end151:                                        ; preds = %if.end100, %if.end, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 0e76770..3612e27 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -30,9 +30,8 @@ entry:
 define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
 entry:
 ; CHECK: f3
-; CHECK: lsrs  r2, r0, #7
-; CHECK: mov r0, r1
-; CHECK: bfi r0, r2, #7, #16
+; CHECK: lsrs {{.*}}, #7
+; CHECK: bfi {{.*}}, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
   %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
@@ -42,8 +41,8 @@ entry:
 ; rdar://8752056
 define i32 @f4(i32 %a) nounwind {
 ; CHECK: f4
-; CHECK: movw r1, #3137
-; CHECK: bfi r1, r0, #15, #5
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{.*}}, #15, #5
   %1 = shl i32 %a, 15
   %ins7 = and i32 %1, 1015808
   %ins12 = or i32 %ins7, 3137
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 10a4985..0992fa8 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -3,26 +3,29 @@
 
 declare double @floor(double) nounwind readnone
 
-define void @t(i1 %a, double %b) {
+define void @t(i32 %c, double %b) {
 entry:
-  br i1 %a, label %bb3, label %bb1
+  %cmp1 = icmp ne i32 %c, 0
+  br i1 %cmp1, label %bb3, label %bb1
 
 bb1:                                              ; preds = %entry
   unreachable
 
 bb3:                                              ; preds = %entry
-  br i1 %a, label %bb7, label %bb5
+  %cmp2 = icmp ne i32 %c, 0
+  br i1 %cmp2, label %bb7, label %bb5
 
 bb5:                                              ; preds = %bb3
   unreachable
 
 bb7:                                              ; preds = %bb3
-  br i1 %a, label %bb11, label %bb9
+  %cmp3 = icmp ne i32 %c, 0
+  br i1 %cmp3, label %bb11, label %bb9
 
 bb9:                                              ; preds = %bb7
-; CHECK:      cmp r0, #0
-; CHECK:      cmp r0, #0
-; CHECK-NEXT: cbnz
+; CHECK:      cmp	r0, #0
+; CHECK:      cmp	r0, #0
+; CHECK-NEXT:      cbnz
   %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index eeaaa7fb..df221b9 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 63249f4..da12114 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 55c321d..15052e0 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 69f0383..566408a 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
 
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index 0f122f2..cdd3489 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index d905217..47f553f 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
 
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index db202dd..405b3bb 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 define i1 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 35b0159..6c5a4fb 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | grep setnp
-; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse | grep setnp
+; RUN: llc < %s -march=x86 -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
deleted file mode 100644
index dee7415..0000000
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s
-; PR2536
-
-; CHECK: andl    $65534, %
-; CHECK-NEXT: movl %
-; CHECK-NEXT: movzwl
-
-@g_5 = external global i16		; <i16*> [#uses=2]
-@g_107 = external global i16		; <i16*> [#uses=1]
-@g_229 = external global i32		; <i32*> [#uses=1]
-@g_227 = external global i16		; <i16*> [#uses=1]
-
-define i32 @func_54(i32 %p_55, i16 zeroext  %p_56) nounwind  {
-entry:
-	load i16* @g_5, align 2		; <i16>:0 [#uses=1]
-	zext i16 %0 to i32		; <i32>:1 [#uses=1]
-	%.mask = and i32 %1, 65534		; <i32> [#uses=1]
-	icmp eq i32 %.mask, 0		; <i1>:2 [#uses=1]
-	load i32* @g_229, align 4		; <i32>:3 [#uses=1]
-	load i16* @g_227, align 2		; <i16>:4 [#uses=1]
-	icmp eq i16 %4, 0		; <i1>:5 [#uses=1]
-	load i16* @g_5, align 2		; <i16>:6 [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb7.preheader, %entry
-	%indvar4 = phi i32 [ 0, %entry ], [ %indvar.next5, %bb7.preheader ]		; <i32> [#uses=1]
-	%p_56_addr.1.reg2mem.0 = phi i16 [ %p_56, %entry ], [ %p_56_addr.0, %bb7.preheader ]		; <i16> [#uses=2]
-	br i1 %2, label %bb7.preheader, label %bb5
-
-bb5:		; preds = %bb
-	store i16 %6, i16* @g_107, align 2
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb5, %bb
-	icmp eq i16 %p_56_addr.1.reg2mem.0, 0		; <i1>:7 [#uses=1]
-	%.0 = select i1 %7, i32 1, i32 %3		; <i32> [#uses=1]
-	urem i32 1, %.0		; <i32>:8 [#uses=1]
-	icmp eq i32 %8, 0		; <i1>:9 [#uses=1]
-	%.not = xor i1 %9, true		; <i1> [#uses=1]
-	%.not1 = xor i1 %5, true		; <i1> [#uses=1]
-	%brmerge = or i1 %.not, %.not1		; <i1> [#uses=1]
-	%iftmp.6.0 = select i1 %brmerge, i32 3, i32 0		; <i32> [#uses=1]
-	mul i32 %iftmp.6.0, %3		; <i32>:10 [#uses=1]
-	icmp eq i32 %10, 0		; <i1>:11 [#uses=1]
-	%p_56_addr.0 = select i1 %11, i16 %p_56_addr.1.reg2mem.0, i16 1		; <i16> [#uses=1]
-	%indvar.next5 = add i32 %indvar4, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp eq i32 %indvar.next5, 17		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb25, label %bb
-
-bb25:		; preds = %bb7.preheader
-	ret i32 1
-}
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index b92c789..1d27fc5 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,5 +1,5 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ebp | count 9
 ; RUN: llc < %s | grep %ecx | count 5
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 00ab735..d423bfc 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,5 +1,5 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rbp | count 7
 ; RUN: llc < %s | grep %rcx | count 3
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 947a1f1..dfd165c 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,12 +1,32 @@
-; RUN: llc < %s -march=x86 -regalloc=linearscan | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
-; RUN: llc < %s -march=x86 -regalloc=fast       | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
-; RUN: llc < %s -march=x86 -regalloc=basic      | grep "#%ebp %esi %edx 8(%edi) %eax %bl"
-; RUN: llc < %s -march=x86 -regalloc=greedy     | grep "#%edx %edi %ebp 8(%esi) %eax %bl"
+; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
 
-; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
+; The 1st, 2nd, 3rd and 5th registers must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
-; operand.  There are many combinations that work; this is what llc puts out now.
-; ModuleID = '<stdin>'
+; operand.
+;
+; CHECK: 1st=[[A1:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK: 2nd=[[A2:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK: 3rd=[[A3:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK-NOT: [[A3]]
+; CHECK: 5th=[[A5:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT; [[A5]]
+; CHECK: =4th
+
+; The 6th operand is an 8-bit register, and it mustn't alias the 1st and 5th.
+; CHECK: 1%e[[S1:.]]x
+; CHECK: 5%e[[S5:.]]x
+; CHECK-NOT: %[[S1]]
+; CHECK-NOT: %[[S5]]
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 	%struct.foo = type { i32, i32, i8* }
@@ -19,7 +39,7 @@ entry:
 	%3 = load i32* %0, align 4		; <i32> [#uses=1]
 	%4 = load i32* %1, align 4		; <i32> [#uses=1]
 	%5 = load i8* %state, align 1		; <i8> [#uses=1]
-	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#$0 $1 $2 $3 $4 $5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
+	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
 	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
 	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
 	store i32 %asmresult1, i32* %0
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 5eba9b9..75e0b8a 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -4,7 +4,7 @@
 
 ; CHECK:         ## InlineAsm End
 ; CHECK-NEXT: BB0_2:
-; CHECK-NEXT:    movl	%esi, %eax
+; CHECK-NEXT:    {{movl	%esi, %eax|addl	%edi, %esi}}
 
 
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index 2853930..45fc269 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -28,5 +28,5 @@ lpad:		; preds = %cont, %entry
 }
 
 ; CHECK: call{{.*}}f
-; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT: movl %eax, %esi
+; CHECK: movl %eax, %esi
+; CHECK: call{{.*}}g
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 848af82..2fceab6 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
-; RUN: llc -march=x86-64 -O2 -regalloc=basic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -O2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -O2 -regalloc=basic < %s | FileCheck %s
 ; Test to check .debug_loc support. This test case emits many debug_loc entries.
 
 ; CHECK: Loc expr size
+; CHECK-NEXT: .short
+; CHECK-NEXT: .Ltmp
 ; CHECK-NEXT: DW_OP_reg
 
 %0 = type { double }
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 6600cc3..7909d27 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -68,9 +68,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: Ldebug_loc0:
 ; CHECK-NEXT: .quad   Lfunc_begin0
 ; CHECK-NEXT: .quad   [[LABEL]]
-; CHECK-NEXT: .short  1
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   85
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .quad   [[LABEL]]
 ; CHECK-NEXT: .quad   [[CLOBBER]]
-; CHECK-NEXT: .short  1
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
+; CHECK-NEXT: Ltmp{{.*}}:
+\ No newline at end of file
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index 6db3ce1..bb1db59 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -22,7 +22,7 @@ bb:
 ; it is.
 ;
 ; CHECK: # %bb
-; CHECK: addq $64036, %rdi
+; CHECK: leaq	64036(%rdx), %rdi
 ; CHECK: rep;stosl
 
   %tmp5 = bitcast i32* %tmp4 to i8*
diff --git a/test/CodeGen/Generic/2011-02-12-shuffle.ll b/test/CodeGen/X86/2011-02-12-shuffle.ll
index b4d56d1..b4d56d1 100644
--- a/test/CodeGen/Generic/2011-02-12-shuffle.ll
+++ b/test/CodeGen/X86/2011-02-12-shuffle.ll
diff --git a/test/CodeGen/X86/2011-05-09-loaduse.ll b/test/CodeGen/X86/2011-05-09-loaduse.ll
new file mode 100644
index 0000000..8673d74
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-09-loaduse.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: test
+;CHECK-not: pshufd
+;CHECK: ret
+define float @test(<4 x float>* %A) nounwind {
+entry:
+  %T = load <4 x float>* %A
+  %R = extractelement <4 x float> %T, i32 3
+  store <4 x float><float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>* %A
+  ret float %R
+}
+
diff --git a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
new file mode 100644
index 0000000..0f18f09
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+%struct.attrib = type { i32, i32 }
+%struct.dfa = type { [80 x i8], i32, %struct.state*, i32, i32, %struct.attrib*, i32, i32 }
+%struct.state = type { i32, [4 x i32] }
+
+@aux_temp = external global %struct.dfa, align 8
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+declare void @__memset_chk() nounwind
+
+define void @dfa_add_string() nounwind uwtable ssp {
+entry:
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %idxprom.i = add i64 0, 1
+  br i1 undef, label %land.end.thread.i, label %land.end.i
+
+land.end.thread.i:                                ; preds = %if.end.i
+  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp1710.i = icmp eq i64 %0, -1
+  br i1 %cmp1710.i, label %cond.false156.i, label %cond.true138.i
+
+land.end.i:                                       ; preds = %if.end.i
+  %1 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp17.i = icmp eq i64 %1, -1
+  br i1 %cmp17.i, label %cond.false156.i, label %cond.true138.i
+
+cond.true138.i:                                   ; preds = %for.end.i, %land.end.thread.i
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.false156.i:                                  ; preds = %for.end.i, %land.end.thread.i
+  %idxprom1114.i = phi i64 [ undef, %land.end.thread.i ], [ %idxprom.i, %land.end.i ]
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
+  %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
+  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8, !tbaa !0
+  %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
+  store i32 0, i32* %att.i, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
new file mode 100644
index 0000000..c595bba
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+@bit_count = external constant [256 x i32], align 16
+
+define fastcc void @unate_intersect() nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc.i
+  br label %do.body.i
+
+do.body.i:                                        ; preds = %do.body.i, %for.body
+  %exitcond149 = icmp eq i64 undef, undef
+  br i1 %exitcond149, label %land.lhs.true, label %do.body.i
+
+land.lhs.true:                                    ; preds = %do.body.i
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.inc.i, %if.then
+  %tmp3524.i = phi i32 [ 0, %land.lhs.true ], [ %tmp351.i, %for.inc.i ]
+  %tmp6.i12 = load i32* undef, align 4
+  br i1 undef, label %for.inc.i, label %if.then.i17
+
+if.then.i17:                                      ; preds = %for.body.i
+  %shr.i14 = lshr i32 %tmp6.i12, 8
+  %and14.i = and i32 %shr.i14, 255
+  %idxprom15.i = zext i32 %and14.i to i64
+  %arrayidx16.i = getelementptr inbounds [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
+  %tmp17.i15 = load i32* %arrayidx16.i, align 4
+  %add.i = add i32 0, %tmp3524.i
+  %add24.i = add i32 %add.i, %tmp17.i15
+  %add31.i = add i32 %add24.i, 0
+  %add33.i = add i32 %add31.i, 0
+  br label %for.inc.i
+
+for.inc.i:                                        ; preds = %if.then.i17, %for.body.i
+  %tmp351.i = phi i32 [ %add33.i, %if.then.i17 ], [ %tmp3524.i, %for.body.i ]
+  br label %for.body.i
+}
diff --git a/test/CodeGen/X86/2011-05-31-movmsk.ll b/test/CodeGen/X86/2011-05-31-movmsk.ll
new file mode 100644
index 0000000..2b54d5c
--- /dev/null
+++ b/test/CodeGen/X86/2011-05-31-movmsk.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mcpu=core2 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+%0 = type { double }
+%union.anon = type { float }
+
+define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %d1, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %d1, double* %__f.i, align 8
+  %tmp = bitcast double %d1 to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %add = fadd double %d1, %d2
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %add, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %add, double* %__f.i, align 8
+  %tmp = bitcast double %add to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %f1, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %f1, float* %__f.i, align 4
+  %2 = bitcast float %f1 to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
+
+define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %add = fadd float %f1, %f2
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %add, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %add, float* %__f.i, align 4
+  %2 = bitcast float %add to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
diff --git a/test/CodeGen/X86/2011-06-01-fildll.ll b/test/CodeGen/X86/2011-06-01-fildll.ll
new file mode 100644
index 0000000..3a0b05f
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-01-fildll.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.6"
+
+define float @f(i64* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: movl
+; CHECK-NOT: movl
+  %tmp1 = load i64* %x, align 4
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+  %add = fadd float %conv, 1.000000e+00
+  ret float %add
+}
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
new file mode 100644
index 0000000..bf7f583
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+  %tmp1 = load i64* %a, align 8
+; Insure x87 ops are properly chained, order preserved.
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+; CHECK: fstps
+  store float %conv, float* %f, align 4
+; CHECK: idivl
+  %div = sdiv i32 %x, %y
+  %conv5 = sext i32 %div to i64
+  store i64 %conv5, i64* %b, align 8
+  ret float %conv
+}
+
+define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+; CHECK: movl $0,
+  store i64 0, i64* %b, align 8
+  %mul = mul nsw i32 %y, %x
+  %sub = add nsw i32 %mul, -1
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
+  %tmp4 = load i64* %arrayidx, align 8
+; CHECK: fildll
+  %conv = sitofp i64 %tmp4 to float
+  store float %conv, float* %f, align 4
+  ret float %conv
+}
diff --git a/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll b/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll
new file mode 100644
index 0000000..d934148
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=x86-64 < %s
+define i32 @signbitl(x86_fp80 %x) nounwind uwtable readnone {
+entry:
+  %tmp4 = bitcast x86_fp80 %x to i80
+  %tmp4.lobit = lshr i80 %tmp4, 79
+  %tmp = trunc i80 %tmp4.lobit to i32
+  ret i32 %tmp
+}
diff --git a/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
new file mode 100644
index 0000000..08178a3
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @Quicksort(i32* %a, i32 %l, i32 %r) nounwind ssp {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %do.cond, %entry
+  %l.tr = phi i32 [ %l, %entry ], [ %i.1, %do.cond ]
+  %r.tr = phi i32 [ %r, %entry ], [ %l.tr, %do.cond ]
+  %idxprom12 = sext i32 %r.tr to i64
+  %arrayidx14 = getelementptr inbounds i32* %a, i64 %idxprom12
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %tailrecurse
+  %i.0 = phi i32 [ %l.tr, %tailrecurse ], [ %i.1, %do.cond ]
+  %add7 = add nsw i32 %i.0, 1
+  %cmp = icmp sgt i32 %add7, %r.tr
+  br i1 %cmp, label %do.cond, label %if.then
+
+if.then:                                          ; preds = %do.body
+  store i32 %add7, i32* %arrayidx14, align 4
+  %add16 = add i32 %i.0, 2
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body, %if.then
+  %i.1 = phi i32 [ %add16, %if.then ], [ %add7, %do.body ]
+  %cmp19 = icmp sgt i32 %i.1, %r.tr
+  br i1 %cmp19, label %tailrecurse, label %do.body
+}
diff --git a/test/CodeGen/X86/9601.ll b/test/CodeGen/X86/9601.ll
new file mode 100644
index 0000000..cd65a03
--- /dev/null
+++ b/test/CodeGen/X86/9601.ll
@@ -0,0 +1,12 @@
+; RUN:  llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR9601
+; Previously we'd crash trying to put a 32-bit float into a constraint
+; for a normal 'r' register.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+entry:
+  %0 = call float asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* undef, float 2.000000e+00) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 7535e07..5068d29 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -12,17 +12,6 @@
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
-
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
 @xsrc = external global [32 x i32]
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
index f924ec8..a4abccb 100644
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -4,9 +4,9 @@
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
 ; CHECK: test1:
-; CHECK:	sbbl	%ecx, %ecx
+; CHECK: cmpl %ecx, %eax
 ; CHECK-NOT: addl
-; CHECK: subl	%ecx, %eax
+; CHECK: adcl $0, %eax
   %add4 = add i32 %x, %sum
   %cmp = icmp ult i32 %add4, %x
   %inc = zext i1 %cmp to i32
@@ -18,8 +18,7 @@ entry:
 ; CHECK: test2:
 ; CHECK: movl
 ; CHECK-NEXT: addl
-; CHECK-NEXT: sbbl
-; CHECK-NEXT: subl
+; CHECK-NEXT: adcl $0
 ; CHECK-NEXT: ret
 define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index b95e5b5..7bf527a 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+
+; Some of these tests depend on -join-physregs to commute instructions.
 
 ; The immediate can be encoded in a smaller way if the
 ; instruction is a sub instead of an add.
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index 640237d..a3dc85f 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding -join-physregs | FileCheck %s
 
 ; PR8365
 ; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
diff --git a/test/CodeGen/X86/basic-promote-integers.ll b/test/CodeGen/X86/basic-promote-integers.ll
new file mode 100644
index 0000000..c80f2b0
--- /dev/null
+++ b/test/CodeGen/X86/basic-promote-integers.ll
@@ -0,0 +1,98 @@
+; Test that vectors are scalarized/lowered correctly
+; (with both legalization methods).
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86                   < %s
+
+; A simple test to check copyToParts and copyFromParts.
+
+define <4 x i64> @test_param_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i64> %A
+}
+
+define <2 x i32> @test_param_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <2 x i32> %B
+}
+
+define <4 x i8> @test_param_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i8> %C
+}
+
+; Simple tests to check arithmetic and vector operations on types which need to
+; be legalized (no loads/stores to/from memory here).
+
+define <4 x i64> @test_arith_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i64> %A, <i64 0, i64 1, i64 3, i64 9>
+   ret <4 x i64> %K
+}
+
+define <2 x i32> @test_arith_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <2 x i32> %B, <i32 0, i32 1>
+   ret <2 x i32> %K
+}
+
+define <4 x i8> @test_arith_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   ret <4 x i8> %K
+}
+
+define i8 @test_arith_3(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   %Y = extractelement <4 x i8> %K, i32 1
+   ret i8 %Y
+}
+
+define <4 x i8> @test_arith_4(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %Y = insertelement <4 x i8> %C, i8 1, i32 0
+   ret <4 x i8> %Y
+}
+
+define <4 x i32> @test_arith_5(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %Y = insertelement <4 x i32> %C, i32 1, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i32> @test_arith_6(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %Y = insertelement <4 x i32> %C, i32 %F, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i64> @test_arith_7(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %Y = insertelement <4 x i64> %A, i64 %W, i32 0
+   ret <4 x i64> %Y
+}
+
+define i64 @test_arith_8(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %T = add i64 %W , 11
+   ret i64 %T
+}
+
+define <4 x i64> @test_arith_9(<4 x i64> %A, <2 x i32> %B, <4 x i16> %C)  {
+   %T = add <4 x i16> %C, %C
+   %F0 = extractelement <4 x i16> %T, i32 0
+   %F1 = extractelement <4 x i16> %T, i32 1
+   %W0 = zext i16 %F0 to i64
+   %W1 = zext i16 %F1 to i64
+   %Y0 = insertelement <4 x i64> %A,  i64 %W0, i32 0
+   %Y1 = insertelement <4 x i64> %Y0, i64 %W1, i32 2
+   ret <4 x i64> %Y1
+}
+
+define <4 x i16> @test_arith_10(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+
+
+; Simple tests to check saving/loading from memory
+define <4 x i16> @test_mem_0(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+
diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll
index d2c30c6..3558376 100644
--- a/test/CodeGen/X86/bool-zext.ll
+++ b/test/CodeGen/X86/bool-zext.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
 
-; CHECK: @bar1
-; CHECK: movzbl
-; CHECK: callq
+; X64: @bar1
+; X64: movzbl
+; X64: jmp
+; WIN64: @bar1
+; WIN64: movzbl
+; WIN64: callq
 define void @bar1(i1 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i1 %v1 to i32
@@ -10,9 +14,12 @@ entry:
   ret void
 }
 
-; CHECK: @bar2
-; CHECK-NOT: movzbl
-; CHECK: callq
+; X64: @bar2
+; X64-NOT: movzbl
+; X64: jmp
+; WIN64: @bar2
+; WIN64-NOT: movzbl
+; WIN64: callq
 define void @bar2(i8 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i8 %v1 to i32
@@ -20,11 +27,16 @@ entry:
   ret void
 }
 
-; CHECK: @bar3
-; CHECK: callq
-; CHECK-NOT: movzbl
-; CHECK-NOT: and
-; CHECK: ret
+; X64: @bar3
+; X64: callq
+; X64-NOT: movzbl
+; X64-NOT: and
+; X64: ret
+; WIN64: @bar3
+; WIN64: callq
+; WIN64-NOT: movzbl
+; WIN64-NOT: and
+; WIN64: ret
 define zeroext i1 @bar3() nounwind ssp {
 entry:
   %call = call i1 @foo2() nounwind
diff --git a/test/CodeGen/X86/byval-align.ll b/test/CodeGen/X86/byval-align.ll
new file mode 100644
index 0000000..c62a181
--- /dev/null
+++ b/test/CodeGen/X86/byval-align.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+%struct.S = type { i32}
+
+@.str = private constant [10 x i8] c"ptr = %p\0A\00", align 1 ; <[10 x i8]*> [#uses=1]
+@.str1 = private constant [8 x i8] c"Failed \00", align 1 ; <[8 x i8]*> [#uses=1]
+@.str2 = private constant [2 x i8] c"0\00", align 1 ; <[2 x i8]*> [#uses=1]
+@.str3 = private constant [7 x i8] c"test.c\00", align 1 ; <[7 x i8]*> [#uses=1]
+@__PRETTY_FUNCTION__.2067 = internal constant [13 x i8] c"aligned_func\00" ; <[13 x i8]*> [#uses=1]
+
+define void @aligned_func(%struct.S* byval align 64 %obj) nounwind {
+entry:
+  %ptr = alloca i8*                               ; <i8**> [#uses=3]
+  %p = alloca i64                                 ; <i64*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %obj1 = bitcast %struct.S* %obj to i8*          ; <i8*> [#uses=1]
+  store i8* %obj1, i8** %ptr, align 8
+  %0 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %1 = ptrtoint i8* %0 to i64                     ; <i64> [#uses=1]
+  store i64 %1, i64* %p, align 8
+  %2 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
+  %4 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %5 = and i64 %4, 140737488355264                ; <i64> [#uses=1]
+  %6 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %7 = icmp ne i64 %5, %6                         ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  call void @__assert_fail(i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8]* @.str3, i64 0, i64 0), i32 18, i8* getelementptr inbounds ([13 x i8]* @__PRETTY_FUNCTION__.2067, i64 0, i64 0)) noreturn nounwind
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
+
+declare i32 @puts(i8*)
+
+declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn nounwind
+
+define void @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: andq    $-64, %rsp
+  %s1 = alloca %struct.S                          ; <%struct.S*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 1, i32* %0, align 4
+  call void @aligned_func(%struct.S* byval align 64 %s1) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 686ed9c..98a26e4 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -9,7 +9,6 @@ entry:
 ; CHECK: main:
 ; CHECK: movl $1, (%esp)
 ; CHECK: leal 16(%esp), %edi
-; CHECK: movl $36, %ecx
 ; CHECK: leal 160(%esp), %esi
 ; CHECK: rep;movsl
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 623ac75..d76fab4 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -31,3 +31,18 @@ entry:
 }
 
 declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+
+; Don't generate the cmovne when the source is known non-zero (and bsr would
+; not set ZF).
+; rdar://9490949
+
+define i32 @t4(i32 %n) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: ret
+  %or = or i32 %n, 1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 7306920..6e5c1cf 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s
 ; CHECK-NOT:     mov
 ; CHECK:     paddw
 ; CHECK-NOT:     mov
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
new file mode 100644
index 0000000..bfc96f1
--- /dev/null
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+; Radar 9511391
+
+;CHECK:         .byte   4                       ## DW_AT_const_value
+define i32 @foo() nounwind uwtable readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  ret i32 42, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590080, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 2, i32 12, metadata !7, null}
+!10 = metadata !{i32 3, i32 2, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
new file mode 100644
index 0000000..5a51eb8
--- /dev/null
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s - | FileCheck %s
+target triple = "x86_64-apple-darwin10.0.0"
+
+;CHECK:        ## DW_OP_constu
+;CHECK-NEXT:  .byte	42
+define i32 @foobar() nounwind readonly noinline ssp {
+entry:
+  %call = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  %call2 = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
+  %add = add nsw i32 %call2, %call, !dbg !12
+  ret i32 %add, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare i32 @bar() nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foobar = !{!6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar}
+!1 = metadata !{i32 524329, metadata !"mu.c", metadata !"/private/tmp", metadata !2}
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0}
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 524544, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5}
+!7 = metadata !{i32 524299, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 15, i32 12, metadata !7, null}
+!10 = metadata !{i32 23, i32 3, metadata !7, null}
+!11 = metadata !{i32 17, i32 3, metadata !7, null}
+!12 = metadata !{i32 18, i32 3, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index e7d5f92..3a849aa 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
 
 ; Radar 8884898
-; CHECK: file	1 "/Users/manav/one/two/simple.c"
+; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
 
 declare i32 @printf(i8*, ...) nounwind
 
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index 76b93dd..afe1729 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -6,8 +6,11 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK: Ldebug_loc0:
 ;CHECK-NEXT:	.quad	Lfunc_begin0
 ;CHECK-NEXT:	.quad	L
-;CHECK-NEXT:	.short	1                       ## Loc expr size
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}          ## Loc expr size
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT: Ltmp7
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
new file mode 100644
index 0000000..81303bb
--- /dev/null
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+;CHECK: .loc	1 2 11 prologue_end
+define i32 @foo(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
+  store i32 2, i32* %j, align 4, !dbg !12
+  %tmp = load i32* %j, align 4, !dbg !13
+  %inc = add nsw i32 %tmp, 1, !dbg !13
+  store i32 %inc, i32* %j, align 4, !dbg !13
+  %tmp1 = load i32* %j, align 4, !dbg !14
+  %tmp2 = load i32* %i.addr, align 4, !dbg !14
+  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
+  store i32 %add, i32* %j, align 4, !dbg !14
+  %tmp3 = load i32* %j, align 4, !dbg !15
+  ret i32 %tmp3, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @foo(i32 21), !dbg !16
+  ret i32 %call, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 13, metadata !1, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 2, i32 6, metadata !10, null}
+!12 = metadata !{i32 2, i32 11, metadata !10, null}
+!13 = metadata !{i32 3, i32 2, metadata !10, null}
+!14 = metadata !{i32 4, i32 2, metadata !10, null}
+!15 = metadata !{i32 5, i32 2, metadata !10, null}
+!16 = metadata !{i32 8, i32 2, metadata !17, null}
+!17 = metadata !{i32 589835, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
new file mode 100644
index 0000000..b115bf4
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9817
+
+
+declare  <4 x i32> @__amdil_get_global_id_int()
+declare  void @llvm.dbg.value(metadata , i64 , metadata )
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
+!7), !dbg !8
+  %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
+  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
+  %tmp3 = add i32 0, %tmp2, !dbg !15
+; CHECK:  ##DEBUG_VALUE: idx <- EAX+0
+  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
+!15
+  %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
+  ret void, !dbg !17
+}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata
+!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
+!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 42, metadata !0, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 3, i32 41, metadata !10, null}
+!12 = metadata !{i32 0}
+!13 = metadata !{i32 590080, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 4, i32 20, metadata !10, null}
+!15 = metadata !{i32 5, i32 15, metadata !10, null}
+!16 = metadata !{i32 6, i32 18, metadata !10, null}
+!17 = metadata !{i32 7, i32 1, metadata !0, null}
+
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
new file mode 100644
index 0000000..d1a9e57
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9879
+
+; CHECK: ##DEBUG_VALUE: tid <-
+%0 = type { i8*, i8*, i8*, i8*, i32 }
+
+@sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@lvgv = internal constant [0 x i8*] zeroinitializer
+@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* bitcast ([1 x i8] addrspace(2)* @sgv to i8*), i8* bitcast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
+
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
+  %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  br label %2
+
+; <label>:2                                       ; preds = %entry
+  %3 = phi i32 [ %1, %entry ]
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %5 = phi i32 [ %3, %2 ]
+  br label %get_local_id.exit
+
+get_local_id.exit:                                ; preds = %4
+  %6 = phi i32 [ %5, %4 ]
+  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
+  %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %8 = extractelement <4 x i32> %7, i32 0
+  br label %9
+
+; <label>:9                                       ; preds = %get_local_id.exit
+  %10 = phi i32 [ %8, %get_local_id.exit ]
+  br label %11
+
+; <label>:11                                      ; preds = %9
+  %12 = phi i32 [ %10, %9 ]
+  br label %get_global_id.exit
+
+get_global_id.exit:                               ; preds = %11
+  %13 = phi i32 [ %12, %11 ]
+  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
+  %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
+  %15 = extractelement <4 x i32> %14, i32 0
+  br label %16
+
+; <label>:16                                      ; preds = %get_global_id.exit
+  %17 = phi i32 [ %15, %get_global_id.exit ]
+  br label %18
+
+; <label>:18                                      ; preds = %16
+  %19 = phi i32 [ %17, %16 ]
+  br label %get_local_size.exit
+
+get_local_size.exit:                              ; preds = %18
+  %20 = phi i32 [ %19, %18 ]
+  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
+  %tmp5 = add i32 %6, %13, !dbg !17
+  %tmp7 = add i32 %tmp5, %20, !dbg !17
+  store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
+  br label %return, !dbg !17
+
+return:                                           ; preds = %get_local_size.exit
+  ret void, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare <4 x i32> @__amdil_get_local_size_int() nounwind
+
+declare <4 x i32> @__amdil_get_local_id_int() nounwind
+
+declare <4 x i32> @__amdil_get_global_id_int() nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 1, i32 32, metadata !0, null}
+!10 = metadata !{i32 590080, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 5, i32 24, metadata !11, null}
+!13 = metadata !{i32 590080, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 6, i32 25, metadata !11, null}
+!15 = metadata !{i32 590080, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 7, i32 26, metadata !11, null}
+!17 = metadata !{i32 9, i32 24, metadata !11, null}
+!18 = metadata !{i32 10, i32 1, metadata !0, null}
+
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index da49f2d..28d873b 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic -join-physregs < %s | FileCheck %s
 
 %struct.a = type { i32 }
 
@@ -53,7 +53,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ;CHECK:Ldebug_loc0:
 ;CHECK-NEXT:	.quad
 ;CHECK-NEXT:	.quad	[[CLOBBER]]
-;CHECK-NEXT:	.short	1
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/div8.ll b/test/CodeGen/X86/div8.ll
new file mode 100644
index 0000000..0825f79
--- /dev/null
+++ b/test/CodeGen/X86/div8.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '8div.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+define signext i8 @test_div(i8 %dividend, i8 %divisor) nounwind ssp {
+entry:
+  %dividend.addr = alloca i8, align 2
+  %divisor.addr = alloca i8, align 1
+  %quotient = alloca i8, align 1
+  store i8 %dividend, i8* %dividend.addr, align 2
+  store i8 %divisor, i8* %divisor.addr, align 1
+  %tmp = load i8* %dividend.addr, align 2
+  %tmp1 = load i8* %divisor.addr, align 1
+; Insist on i8->i32 zero extension, even though divb demands only i16:
+; CHECK: movzbl {{.*}}%eax
+; CHECK: divb
+  %div = udiv i8 %tmp, %tmp1
+  store i8 %div, i8* %quotient, align 1
+  %tmp4 = load i8* %quotient, align 1
+  ret i8 %tmp4
+}
diff --git a/test/CodeGen/X86/eh_frame.ll b/test/CodeGen/X86/eh_frame.ll
new file mode 100644
index 0000000..3b792b2
--- /dev/null
+++ b/test/CodeGen/X86/eh_frame.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu | FileCheck -check-prefix=STATIC %s
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=PIC %s
+
+@__FRAME_END__ = constant [1 x i32] zeroinitializer, section ".eh_frame"
+
+@foo = external global i32
+@bar1 = constant i8* bitcast (i32* @foo to i8*), section "my_bar1", align 8
+
+
+; STATIC: .section	.eh_frame,"a",@progbits
+; STATIC: .section	my_bar1,"a",@progbits
+
+; PIC:	.section	.eh_frame,"a",@progbits
+; PIC:	.section	my_bar1,"aw",@progbits
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index b303cd1..874c53a 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,10 +6,24 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NOT: movq %rsp, %rbp
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP:      movq %rsp, %rbp
-; CHECK-FP-NEXT: Ltmp1:
-; CHECK-FP:      nop
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_startproc
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: pushq %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_offset %rbp, -16
+; CHECK-FP-NEXT: movq %rsp, %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-FP-NEXT: nop
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/fast-isel-agg-constant.ll b/test/CodeGen/X86/fast-isel-agg-constant.ll
new file mode 100644
index 0000000..ce0dff7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-agg-constant.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't screw up aggregate constants.
+; (Failing out is okay, as long as we don't miscompile.)
+
+%bar = type { i32 }
+
+define i32 @foo()  {
+  %tmp = extractvalue %bar { i32 3 }, 0
+  ret i32 %tmp
+; CHECK: movl $3, %eax
+}
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 5fcdbbb..2fbe4e2 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -fast-isel -march=x86 | grep and
+; RUN: llc < %s -O0 -fast-isel-abort -march=x86 | FileCheck %s
 
-define i32 @t() nounwind {
+%struct.s = type {i32, i32, i32}
+
+define i32 @test1() nounwind {
 tak:
 	%tmp = call i1 @foo()
 	br i1 %tmp, label %BB1, label %BB2
@@ -8,6 +10,46 @@ BB1:
 	ret i32 1
 BB2:
 	ret i32 0
+; CHECK: test1:
+; CHECK: calll
+; CHECK-NEXT: testb	$1
 }
-
 declare i1 @foo() zeroext nounwind
+
+declare void @foo2(%struct.s* byval)
+
+define void @test2(%struct.s* %d) nounwind {
+  call void @foo2(%struct.s* %d byval)
+  ret void
+; CHECK: test2:
+; CHECK: movl	(%eax)
+; CHECK: movl {{.*}}, (%esp)
+; CHECK: movl	4(%eax)
+; CHECK: movl {{.*}}, 4(%esp)
+; CHECK: movl	8(%eax)
+; CHECK: movl {{.*}}, 8(%esp)
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @test3(i8* %a) {
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test3:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	$0, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memset
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @test4(i8* %a, i8* %b) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test4:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	{{.*}}, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memcpy
+}
diff --git a/test/CodeGen/X86/fast-isel-extract.ll b/test/CodeGen/X86/fast-isel-extract.ll
new file mode 100644
index 0000000..f63396e
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-extract.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort | FileCheck %s
+
+%struct.x = type { i64, i64 }
+%addovf = type { i32, i1 }
+declare %struct.x @f()
+
+define void @test1(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 0
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test1:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rax
+}
+
+define void @test2(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 1
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test2:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rdx
+}
+
+declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+define void @test3(i32 %x, i32 %y, i32* %z) {
+  %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %sum = extractvalue %addovf %r, 0
+  %sum3 = mul i32 %sum, 3
+  %bit = extractvalue %addovf %r, 1
+  br i1 %bit, label %then, label %end
+  
+then:
+  store i32 %sum3, i32* %z
+  br label %end
+
+end:
+  ret void
+; CHECK: test3
+; CHECK: addl
+; CHECK: seto %al
+; CHECK: testb $1, %al
+}
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index 5ffd48b..f42a4a2 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
 
 ; CHECK: doo:
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index 5d572c1..bea18a1 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,14 +1,15 @@
-; RUN: llc < %s -march=x86 -fast-isel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
 
-declare i64 @test1a(i64)
+declare i32 @test1a(i32)
 
-define i32 @test1(i64 %x) nounwind {
+define i32 @test1(i32 %x) nounwind {
 ; CHECK: test1:
 ; CHECK: andb $1, %
-	%y = add i64 %x, -3
-	%t = call i64 @test1a(i64 %y)
-	%s = mul i64 %t, 77
-	%z = trunc i64 %s to i1
+	%y = add i32 %x, -3
+	%t = call i32 @test1a(i32 %y)
+	%s = mul i32 %t, 77
+	%z = trunc i32 %s to i1
 	br label %next
 
 next:		; preds = %0
diff --git a/test/CodeGen/X86/fast-isel-ret-ext.ll b/test/CodeGen/X86/fast-isel-ret-ext.ll
new file mode 100644
index 0000000..fd768cb
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-ret-ext.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple x86_64-apple-darwin10 | FileCheck %s
+
+define zeroext i8 @test1(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test1:
+  ; CHECK: movzbl {{.*}}, %eax
+}
+
+define signext i8 @test2(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test2:
+  ; CHECK: movsbl {{.*}}, %eax
+}
+
+define zeroext i16 @test3(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test3:
+  ; CHECK: movzwl {{.*}}, %eax
+}
+
+define signext i16 @test4(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test4:
+  ; CHECK: movswl {{.*}}, %eax
+}
+
+define zeroext i1 @test5(i32 %y) nounwind {
+  %conv = trunc i32 %y to i1
+  ret i1 %conv
+  ; CHECK: test5:
+  ; CHECK: andb $1
+  ; CHECK: movzbl {{.*}}, %eax
+}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 5a1d213..8391860 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10
 
 ; This tests very minimal fast-isel functionality.
 
@@ -27,7 +27,7 @@ exit:
   ret i32* %t8
 }
 
-define double @bar(double* %p, double* %q) nounwind {
+define void @bar(double* %p, double* %q) nounwind {
 entry:
   %r = load double* %p
   %s = load double* %q
@@ -41,7 +41,8 @@ fast:
   br label %exit
 
 exit:
-  ret double %t3
+  store double %t3, double* %q
+  ret void
 }
 
 define i32 @cast() nounwind {
@@ -68,24 +69,28 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   ret i8* %t
 }
 
-define i8 @trunc_i32_i8(i32 %x) signext nounwind  {
+define void @trunc_i32_i8(i32 %x, i8* %p) nounwind  {
 	%tmp1 = trunc i32 %x to i8
-	ret i8 %tmp1
+	store i8 %tmp1, i8* %p
+	ret void
 }
 
-define i8 @trunc_i16_i8(i16 signext %x) signext nounwind  {
+define void @trunc_i16_i8(i16 signext %x, i8* %p) nounwind  {
 	%tmp1 = trunc i16 %x to i8
-	ret i8 %tmp1
+	store i8 %tmp1, i8* %p
+	ret void
 }
 
-define i8 @shl_i8(i8 %a, i8 %c) nounwind {
-       %tmp = shl i8 %a, %c
-       ret i8 %tmp
+define void @shl_i8(i8 %a, i8 %c, i8* %p) nounwind {
+  %tmp = shl i8 %a, %c
+  store i8 %tmp, i8* %p
+  ret void
 }
 
-define i8 @mul_i8(i8 %a) nounwind {
-       %tmp = mul i8 %a, 17
-       ret i8 %tmp
+define void @mul_i8(i8 %a, i8* %p) nounwind {
+  %tmp = mul i8 %a, 17
+  store i8 %tmp, i8* %p
+  ret void
 }
 
 define void @load_store_i1(i1* %p, i1* %q) nounwind {
diff --git a/test/CodeGen/X86/fold-xmm-zero.ll b/test/CodeGen/X86/fold-xmm-zero.ll
new file mode 100644
index 0000000..b4eeb40
--- /dev/null
+++ b/test/CodeGen/X86/fold-xmm-zero.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx10.6.7 -mattr=+sse2 | FileCheck %s
+
+; Simple test to make sure folding for special constants (like float zero)
+; isn't completely broken.
+
+; CHECK: divss	LCPI0
+
+%0 = type { float, float, float, float, float, float, float, float }
+
+define void @f() nounwind ssp {
+entry:
+  %0 = tail call %0 asm sideeffect "foo", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00) nounwind
+  %asmresult = extractvalue %0 %0, 0
+  %asmresult8 = extractvalue %0 %0, 1
+  %asmresult9 = extractvalue %0 %0, 2
+  %asmresult10 = extractvalue %0 %0, 3
+  %asmresult11 = extractvalue %0 %0, 4
+  %asmresult12 = extractvalue %0 %0, 5
+  %asmresult13 = extractvalue %0 %0, 6
+  %asmresult14 = extractvalue %0 %0, 7
+  %div = fdiv float %asmresult, 0.000000e+00
+  %1 = tail call %0 asm sideeffect "bar", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div, float %asmresult8, float %asmresult9, float %asmresult10, float %asmresult11, float %asmresult12, float %asmresult13, float %asmresult14) nounwind
+  %asmresult24 = extractvalue %0 %1, 0
+  %asmresult25 = extractvalue %0 %1, 1
+  %asmresult26 = extractvalue %0 %1, 2
+  %asmresult27 = extractvalue %0 %1, 3
+  %asmresult28 = extractvalue %0 %1, 4
+  %asmresult29 = extractvalue %0 %1, 5
+  %asmresult30 = extractvalue %0 %1, 6
+  %asmresult31 = extractvalue %0 %1, 7
+  %div33 = fdiv float %asmresult24, 0.000000e+00
+  %2 = tail call %0 asm sideeffect "baz", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div33, float %asmresult25, float %asmresult26, float %asmresult27, float %asmresult28, float %asmresult29, float %asmresult30, float %asmresult31) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/hidden-vis-pic.ll b/test/CodeGen/X86/hidden-vis-pic.ll
index 217dba6..67be3d0 100644
--- a/test/CodeGen/X86/hidden-vis-pic.ll
+++ b/test/CodeGen/X86/hidden-vis-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim | FileCheck %s
 
 
 
@@ -26,7 +26,7 @@ entry:
 
 @.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
 
-define hidden void @func() nounwind ssp {
+define hidden void @func() nounwind ssp uwtable {
 entry:
   %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
   br label %return
@@ -37,7 +37,7 @@ return:                                           ; preds = %entry
 
 declare i32 @puts(i8*)
 
-define hidden i32 @main() nounwind ssp {
+define hidden i32 @main() nounwind ssp uwtable {
 entry:
   %retval = alloca i32                            ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
new file mode 100644
index 0000000..72e17c0
--- /dev/null
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx  | FileCheck %s
+
+; Common "xorb al, al" instruction in the two successor blocks should be
+; moved to the entry block above the test + je.
+
+; rdar://9145558
+
+define zeroext i1 @t(i32 %c) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: xorb %al, %al
+; CHECK: test
+; CHECK: je
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+; CHECK: callq
+  %call = tail call zeroext i1 (...)* @foo() nounwind
+  br label %return
+
+return:
+; CHECK: ret
+  %retval.0 = phi i1 [ %call, %if.then ], [ false, %entry ]
+  ret i1 %retval.0
+}
+
+declare zeroext i1 @foo(...)
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
new file mode 100644
index 0000000..29c5ae5
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -0,0 +1,17 @@
+; RUN: not llc -march x86 -regalloc=fast       < %s 2> %t1
+; RUN: not llc -march x86 -regalloc=basic      < %s 2> %t2
+; RUN: not llc -march x86 -regalloc=greedy     < %s 2> %t3
+; RUN: FileCheck %s < %t1
+; RUN: FileCheck %s < %t2
+; RUN: FileCheck %s < %t3
+
+; The register allocator must fail on this function, and it should print the
+; inline asm in the diagnostic.
+; CHECK: LLVM ERROR: Ran out of registers during register allocation!
+; CHECK: INLINEASM <es:hello world>
+
+define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
+entry:
+  tail call void asm sideeffect "hello world", "r,r,r,r,r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 507a328..4a98e63 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: not grep cmp %t
-; RUN: not grep xor %t
-; RUN: grep jne %t | count 1
-; RUN: grep jp %t | count 1
-; RUN: grep setnp %t | count 1
-; RUN: grep sete %t | count 1
-; RUN: grep and %t | count 1
-; RUN: grep cvt %t | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define i32 @isint_return(double %d) nounwind {
+; CHECK-NOT: xor
+; CHECK: cvt
   %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
+; CHECK: cmpeqsd
   %c = fcmp oeq double %d, %e
+; CHECK-NEXT: movd
+; CHECK-NEXT: andl
   %z = zext i1 %c to i32
   ret i32 %z
 }
@@ -19,9 +17,14 @@ define i32 @isint_return(double %d) nounwind {
 declare void @foo()
 
 define void @isint_branch(double %d) nounwind {
+; CHECK: cvt
   %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
+; CHECK: ucomisd
   %c = fcmp oeq double %d, %e
+; CHECK-NEXT: jne
+; CHECK-NEXT: jp
   br i1 %c, label %true, label %false
 true:
   call void @foo()
diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll
index 5bc4f7e..09c1c07 100644
--- a/test/CodeGen/X86/lsr-overflow.ll
+++ b/test/CodeGen/X86/lsr-overflow.ll
@@ -25,3 +25,21 @@ __ABContainsLabel.exit:
   %cmp = icmp eq i64 %indvar, 9223372036854775807
   ret i1 %cmp
 }
+
+define void @func_37() noreturn nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.inc8 ]
+  %sub.i = add i64 undef, %indvar
+  %cmp.i = icmp eq i64 %sub.i, -9223372036854775808
+  br i1 undef, label %for.inc8, label %for.cond4
+
+for.cond4:                                        ; preds = %for.cond4, %for.body
+  br label %for.cond4
+
+for.inc8:                                         ; preds = %for.body
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body
+}
diff --git a/test/CodeGen/X86/movntdq-no-avx.ll b/test/CodeGen/X86/movntdq-no-avx.ll
new file mode 100644
index 0000000..8b7e6ef
--- /dev/null
+++ b/test/CodeGen/X86/movntdq-no-avx.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; Test that we produce a movntdq, not a vmovntdq
+; CHECK-NOT: vmovntdq
+
+define void @test(<2 x i64>* nocapture %a, <2 x i64> %b) nounwind optsize {
+entry:
+  store <2 x i64> %b, <2 x i64>* %a, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
new file mode 100644
index 0000000..1d09535
--- /dev/null
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+; CHECK: movntps
+  %cast = bitcast i8* %B to <4 x float>*
+  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+; CHECK: movntdq
+  %cast1 = bitcast i8* %B to <2 x i64>*
+  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: movntpd
+  %cast2 = bitcast i8* %B to <2 x double>*
+  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e35eb70..e42aa9d 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -41,14 +41,13 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;      CHECK:         jle
 ;  CHECK-NOT:         cmov
-;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx}}
+;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx|%eax, %eax}}
 ; CHECK-NEXT:         align
 ; CHECK-NEXT: BB1_2:
-; CHECK-NEXT:         callq
+; CHECK:              callq
 ; CHECK-NEXT:         incl    [[BX:%[a-z0-9]+]]
 ; CHECK-NEXT:         cmpl    [[R14:%[a-z0-9]+]], [[BX]]
-; CHECK-NEXT:         movq    %rax, %r{{di|cx}}
-; CHECK-NEXT:         jl
+; CHECK:              jl
 
 define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
 entry:
diff --git a/test/CodeGen/X86/peep-setb.ll b/test/CodeGen/X86/peep-setb.ll
new file mode 100644
index 0000000..0bab789
--- /dev/null
+++ b/test/CodeGen/X86/peep-setb.ll
@@ -0,0 +1,82 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @test1(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %add = add i8 %cond, %b
+  ret i8 %add
+; CHECK: test1:
+; CHECK: adcb $0
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %add = add i32 %cond, %b
+  ret i32 %add
+; CHECK: test2:
+; CHECK: adcl $0
+}
+
+define i64 @test3(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %add = add i64 %conv, %b
+  ret i64 %add
+; CHECK: test3:
+; CHECK: adcq $0
+}
+
+define i8 @test4(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test4:
+; CHECK: sbbb $0
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test5:
+; CHECK: sbbl $0
+}
+
+define i64 @test6(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test6:
+; CHECK: sbbq $0
+}
+
+define i8 @test7(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = sext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test7:
+; CHECK: adcb $0
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = sext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test8:
+; CHECK: adcl $0
+}
+
+define i64 @test9(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = sext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test9:
+; CHECK: adcq $0
+}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 13e804d..02c519f 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 4
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
@@ -12,7 +12,8 @@ forcond.preheader:		; preds = %entry
 
 ifthen:		; preds = %entry
 	ret i32 0
-
+; CHECK: forbody
+; CHECK-NOT: mov
 forbody:		; preds = %forbody, %forcond.preheader
 	%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]		; <i32> [#uses=3]
 	%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index f23669e..4162015 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -join-physregs | FileCheck %s
 ; rdar://5571034
 
+; This requires physreg joining, %vreg13 is live everywhere:
+; 304L		%CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
+; 320L		%vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
+; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
+
 define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
 ; CHECK: foo:
 entry:
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index bf5229a..d8ed4c0 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 11
 
+; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
+
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
         ret <4 x i32> %A
diff --git a/test/CodeGen/X86/pr10068.ll b/test/CodeGen/X86/pr10068.ll
new file mode 100644
index 0000000..8829c5d
--- /dev/null
+++ b/test/CodeGen/X86/pr10068.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+
+define void @foobar() {
+entry:
+  %sub.i = trunc i64 undef to i32
+  %shr80.i = ashr i32 %sub.i, 16
+  %add82.i = add nsw i32 %shr80.i, 1
+  %notlhs.i = icmp slt i32 %shr80.i, undef
+  %notrhs.i = icmp sgt i32 %add82.i, -1
+  %or.cond.not.i = and i1 %notrhs.i, %notlhs.i
+  %cmp154.i = icmp slt i32 0, undef
+  %or.cond406.i = and i1 %or.cond.not.i, %cmp154.i
+  %or.cond406.not.i = xor i1 %or.cond406.i, true
+  %or.cond407.i = or i1 undef, %or.cond406.not.i
+  br i1 %or.cond407.i, label %if.then158.i, label %if.end163.i
+
+if.then158.i:
+  ret void
+
+if.end163.i:                                      ; preds = %if.end67.i
+  ret void
+}
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index ef0f9ea..5dab5c9 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 -disable-branch-fold | FileCheck %s
 ; PR2659
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -19,7 +18,11 @@ forcond.preheader:              ; preds = %entry
 ; CHECK-NOT: xorl
 ; CHECK-NOT: movl
 ; CHECK-NOT: LBB
-; CHECK: je
+; CHECK: jne
+
+; There should be no moves required in the for loop body.
+; CHECK: %forbody
+; CHECK-NOT: mov
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
index 9b251f5..ba92c77 100644
--- a/test/CodeGen/X86/pr9127.ll
+++ b/test/CodeGen/X86/pr9127.ll
@@ -10,4 +10,4 @@ entry:
 }
 
 ; test that the load is folded.
-; CHECK: ucomisd	(%{{rdi|rdx}}), %xmm0
+; CHECK: cmpeqsd	(%{{rdi|rdx}}), %xmm0
diff --git a/test/CodeGen/X86/pr9743.ll b/test/CodeGen/X86/pr9743.ll
index 8feccd9..6597c23 100644
--- a/test/CodeGen/X86/pr9743.ll
+++ b/test/CodeGen/X86/pr9743.ll
@@ -9,9 +9,9 @@ define void @f() {
 ; CHECK-NEXT: :
 ; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT: :
-; CHECK-NEXT:  .cfi_offset 6, -16
+; CHECK-NEXT:  .cfi_offset %rbp, -16
 ; CHECK-NEXT:  movq    %rsp, %rbp
 ; CHECK-NEXT: :
-; CHECK-NEXT:  .cfi_def_cfa_register 6
+; CHECK-NEXT:  .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:  popq    %rbp
 ; CHECK-NEXT:  ret
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 04b57dd..865e147 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
@@ -8,19 +8,32 @@ entry:
 	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
 	store <1 x i64> %call, <1 x i64>* @g_v1di
         ret void
+; CHECK: t1:
+; CHECK: callq
+; CHECK-NEXT: movq	_g_v1di
+; CHECK-NEXT: movq	%rax,
 }
 
 declare <1 x i64> @return_v1di()
 
 define <1 x i64> @t2() nounwind {
 	ret <1 x i64> <i64 1>
+; CHECK: t2:
+; CHECK: movl	$1
+; CHECK-NEXT: ret
 }
 
 define <2 x i32> @t3() nounwind {
 	ret <2 x i32> <i32 1, i32 0>
+; CHECK: t3:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
 }
 
 define double @t4() nounwind {
 	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
+; CHECK: t4:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
 }
 
diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
index 4a9c1ba..aa2f0af 100644
--- a/test/CodeGen/X86/setoeq.ll
+++ b/test/CodeGen/X86/setoeq.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86  | grep set | count 2
-; RUN: llc < %s -march=x86  | grep and
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define zeroext i8 @t(double %x) nounwind readnone {
 entry:
@@ -7,5 +6,16 @@ entry:
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
 	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpeqsd
+	ret i8 %retval12
+}
+
+define zeroext i8 @u(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp une double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpneqsd
 	ret i8 %retval12
 }
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll
new file mode 100644
index 0000000..24ba1fc
--- /dev/null
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define i64 @test(i64 %A) {
+; CHECK: @test
+; CHECK: shrq $54
+; CHECK: andq $1020
+; CHECK: ret
+    %B = lshr i64 %A, 56
+    %C = shl i64 %B, 2
+    ret i64 %C
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index de2a81e8..4a98efb 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -198,7 +198,7 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
 
 ; rdar://r7717598
 %struct.ns = type { i32, i32 }
-%struct.cp = type { float, float }
+%struct.cp = type { float, float, float, float, float }
 
 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
 ; 32: t13:
@@ -229,7 +229,7 @@ entry:
 ; 64: t14:
 ; 64: movq 32(%rdi)
 ; 64-NOT: movq 16(%rdi)
-; 64: jmpq *16(%rdi)
+; 64: jmpq *16({{%rdi|%rax}})
   %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
   %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
   %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
deleted file mode 100644
index 7c23adb..0000000
--- a/test/CodeGen/X86/smul-with-overflow-2.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 | grep mul | count 1
-; RUN: llc < %s -march=x86 | grep add | count 3
-
-define i32 @t1(i32 %a, i32 %b) nounwind readnone {
-entry:
-        %tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
-}
-
-define i32 @t2(i32 %a, i32 %b) nounwind readnone {
-entry:
-        %tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
-}
-
-declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
deleted file mode 100644
index 49c31f5..0000000
--- a/test/CodeGen/X86/smul-with-overflow-3.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep {jno} | count 1
-
-@ok = internal constant [4 x i8] c"%d\0A\00"
-@no = internal constant [4 x i8] c"no\0A\00"
-
-define i1 @func1(i32 %v1, i32 %v2) nounwind {
-entry:
-  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
-  %sum = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %normal
-
-overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
-  ret i1 false
-
-normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
-  ret i1 true
-}
-
-declare i32 @printf(i8*, ...) nounwind
-declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 6d125e4..7c2e247 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
 
-define i1 @func1(i32 %v1, i32 %v2) nounwind {
+define i1 @test1(i32 %v1, i32 %v2) nounwind {
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %sum = extractvalue {i32, i1} %t, 0
@@ -17,7 +17,53 @@ normal:
 overflow:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
+; CHECK: test1:
+; CHECK: imull
+; CHECK-NEXT: jo
+}
+
+define i1 @test2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+; CHECK: test2:
+; CHECK: imull
+; CHECK-NEXT: jno
 }
 
 declare i32 @printf(i8*, ...) nounwind
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test4:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 348121a..ff0af25 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
@@ -12,6 +12,9 @@
 ; y_ : use -0.0 instead of %y
 ; _inverse : swap the arms of the select.
 
+; Some of these tests depend on -join-physregs commuting instructions to
+; eliminate copies.
+
 ; CHECK:      ogt:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8e72f13..8c2e58d 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -62,11 +62,10 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
 	ret <8 x i16> %tmp
 ; X64: t4:
-; X64: 	pextrw	$7, %xmm0, %eax
-; X64: 	pshufhw	$100, %xmm0, %xmm1
-; X64: 	pinsrw	$1, %eax, %xmm1
-; X64: 	pextrw	$1, %xmm0, %eax
-; X64: 	movdqa	%xmm1, %xmm0
+; X64: 	pextrw	$7, [[XMM0:%xmm[0-9]+]], %eax
+; X64: 	pshufhw	$100, [[XMM0]], [[XMM1:%xmm[0-9]+]]
+; X64: 	pinsrw	$1, %eax, [[XMM1]]
+; X64: 	pextrw	$1, [[XMM0]], %eax
 ; X64: 	pinsrw	$4, %eax, %xmm0
 ; X64: 	ret
 }
@@ -251,13 +250,13 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		pinsrw	$0, %eax, %xmm1
-; X64: 		pextrw	$8, %xmm0, %eax
-; X64: 		pinsrw	$1, %eax, %xmm1
-; X64: 		pextrw	$1, %xmm1, %ecx
-; X64: 		movd	%xmm1, %edx
-; X64: 		pinsrw	$0, %edx, %xmm1
-; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
+; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
+; X64: 		pinsrw	$1, %eax, [[X1]]
+; X64: 		pextrw	$1, [[X1]], %ecx
+; X64: 		movd	[[X1]], %edx
+; X64: 		pinsrw	$0, %edx, %xmm
+; X64: 		pinsrw	$1, %eax, %xmm
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index 1723909..c787523 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -1,38 +1,39 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
 
-declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
-declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
-declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
 
-define i32 @crc32_8(i32 %a, i8 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
-; X32: _crc32_8:
+; X32: _crc32_32_8:
 ; X32:     crc32b   8(%esp), %eax
 
-; X64: _crc32_8:
-; X64:     crc32b   %sil, %eax
+; X64: _crc32_32_8:
+; X64:     crc32b   %sil,
 }
 
 
-define i32 @crc32_16(i32 %a, i16 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
-; X32: _crc32_16:
+; X32: _crc32_32_16:
 ; X32:     crc32w   8(%esp), %eax
 
-; X64: _crc32_16:
-; X64:     crc32w   %si, %eax
+; X64: _crc32_32_16:
+; X64:     crc32w   %si,
 }
 
 
-define i32 @crc32_32(i32 %a, i32 %b) nounwind {
-  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp
-; X32: _crc32_32:
+; X32: _crc32_32_32:
 ; X32:     crc32l   8(%esp), %eax
 
-; X64: _crc32_32:
-; X64:     crc32l   %esi, %eax
+; X64: _crc32_32_32:
+; X64:     crc32l   %esi,
 }
+
diff --git a/test/CodeGen/X86/sse42_64.ll b/test/CodeGen/X86/sse42_64.ll
new file mode 100644
index 0000000..8b3a69b
--- /dev/null
+++ b/test/CodeGen/X86/sse42_64.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_8:
+; X64:     crc32b   %sil,
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_64:
+; X64:     crc32q   %rsi,
+}
+
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 02399c4..a57fa58 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
 ; CHECK: fail
 ; CHECK-NOT: fail
 
@@ -117,7 +117,16 @@ define <2 x double> @d8(<2 x double> %f) {
   ret <2 x double> %t
 }
 
-; This one should fail to fuse.
+; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
+; it produces:
+;   callq	test_vd
+;   movapd	(%rsp), %xmm1           # 16-byte Reload
+;   hsubpd	%xmm0, %xmm1
+;   movapd	%xmm1, %xmm0
+;   addq	$24, %rsp
+;   ret
+; RABasic still tries to fold this one.
+
 define <2 x double> @z0(<2 x double> %f) {
   %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 77710ad..d6c16ca 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -412,9 +412,9 @@ return:
 ; can fall-through into the ret and the other side has to branch anyway.
 
 ; CHECK: TESTE:
-; CHECK: imulq
-; CHECK-NEXT: LBB8_2:
-; CHECK-NEXT: ret
+; CHECK: ret
+; CHECK-NOT: ret
+; CHECK: size TESTE
 
 define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 060ce0f..c18c7aa 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
-; CHECK: subq  ${{24|72}}, %rsp
+; CHECK: subq  ${{24|72|80}}, %rsp
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index c997661..84fcbc7 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -12,3 +12,27 @@ define i1 @a(i32 %x) zeroext nounwind {
 ; CHECK: movzbl	%al, %eax
 ; CHECK: ret
 }
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test2:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index 8fbbd39..a0448ec 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -7,10 +7,10 @@
 ; Use the flags on the add.
 
 ; CHECK: test1:
-;      CHECK: addl    (%r[[A0:di|cx]]), {{%esi|%edx}}
-; CHECK-NEXT: movl    {{%edx|%r8d}}, %eax
-; CHECK-NEXT: cmovnsl {{%ecx|%r9d}}, %eax
-; CHECK-NEXT: ret
+;     CHECK: addl
+; CHECK-NOT: test
+;     CHECK: cmovnsl
+;     CHECK: ret
 
 define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
 	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
@@ -42,7 +42,7 @@ false:
 ; Do use the flags result of the and here, since the and has another use.
 
 ; CHECK: test3:
-;      CHECK: andl    $16, %e[[A0]]
+;      CHECK: andl    $16, %e
 ; CHECK-NEXT: jne
 
 define void @test3(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
new file mode 100644
index 0000000..73d80eb
--- /dev/null
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00"
+@sel = external global i8*
+@sel3 = external global i8*
+@sel4 = external global i8*
+@sel5 = external global i8*
+@sel6 = external global i8*
+@sel7 = external global i8*
+
+; X64: @foo
+; X64: jmp
+; WIN64: @foo
+; WIN64: callq
+define void @foo(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare i32 @printf(i8*, ...) optsize noredzone
+
+; X64: @bar
+; X64: jmp
+; WIN64: @bar
+; WIN64: jmp
+define void @bar(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  tail call void @bar2(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare void @bar2(i8*, i64) optsize noredzone
+
+; X64: @foo2
+; X64: jmp
+; WIN64: @foo2
+; WIN64: callq
+define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
+entry:
+  %tmp1 = load i8** @sel, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x2(i8*, i8*, ...) optsize noredzone
+
+; X64: @foo6
+; X64: jmp
+; WIN64: @foo6
+; WIN64: callq
+define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo7
+; X64: callq
+; WIN64: @foo7
+; WIN64: callq
+define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp6 = load i8** @sel7, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo8
+; X64: callq
+; WIN64: @foo8
+; WIN64: callq
+define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
+  ret i8* %call
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index dab5dd1..f487654 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
-; RUN: grep extractps   %t | count 1
-; RUN: grep pextrd      %t | count 1
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: not grep extractps   %t
+; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: not grep movss   %t
+; RUN: grep movss   %t | count 2
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index b013730..2c8796b 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
-; RUN: grep movss    %t | count 3
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss    %t | count 4
 ; RUN: grep movhlps  %t | count 1
-; RUN: grep pshufd   %t | count 1
+; RUN: not grep pshufd   %t 
 ; RUN: grep unpckhpd %t | count 1
 
 define void @test1(<4 x float>* %F, float* %f) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 2ee87fe..06f38ed 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,8 +1,9 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
 
+; sse:  t1:
+; sse2: t1:
 define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
-; sse: movaps
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
@@ -10,6 +11,8 @@ define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
         ret <4 x float> %tmp1
 }
 
+; sse:  t2:
+; sse2: t2:
 define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 ; sse: shufps
 ; sse2: pshufd
@@ -18,8 +21,9 @@ define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
+; sse:  t3:
+; sse2: t3:
 define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
-; sse: movaps
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
@@ -27,7 +31,10 @@ define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
+; sse:  t4:
+; sse2: t4:
 define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+
 ; sse: shufps
 ; sse2: pshufd
 ; sse2-NEXT: ret
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index 39e7d71..fe7fa2f 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=sandybridge | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
 
 ; Test that we are not lowering uinttofp to scalars
 define <4 x float> @test1(<4 x i32> %A) nounwind {
diff --git a/test/CodeGen/X86/visibility2.ll b/test/CodeGen/X86/visibility2.ll
new file mode 100644
index 0000000..72ea733
--- /dev/null
+++ b/test/CodeGen/X86/visibility2.ll
@@ -0,0 +1,18 @@
+; This test case ensures that when the visibility of a global declaration is 
+; emitted they are not treated as definitions.  Test case for r132825.
+; Fixes <rdar://problem/9429892>.
+;
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+
+@foo_private_extern_str = external hidden global i8*
+
+define void @foo1() nounwind ssp {
+entry:
+  %tmp = load i8** @foo_private_extern_str, align 8
+  call void @foo3(i8* %tmp)
+  ret void
+}
+
+declare void @foo3(i8*)
+
+; CHECK-NOT: .private_extern
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index 82c8252..c91627c 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -4,15 +4,15 @@
 
 ; Both loads should happen before either store.
 
-; CHECK: movl  (%rdi), %eax
-; CHECK: movl  (%rsi), %ecx
-; CHECK: movl  %ecx, (%rdi)
-; CHECK: movl  %eax, (%rsi)
+; CHECK: movl  (%rdi), %[[R1:...]]
+; CHECK: movl  (%rsi), %[[R2:...]]
+; CHECK: movl  %[[R2]], (%rdi)
+; CHECK: movl  %[[R1]], (%rsi)
 
-; WIN64: movl  (%rcx), %eax
-; WIN64: movl  (%rdx), %esi
-; WIN64: movl  %esi, (%rcx)
-; WIN64: movl  %eax, (%rdx)
+; WIN64: movl  (%rcx), %[[R1:...]]
+; WIN64: movl  (%rdx), %[[R2:...]]
+; WIN64: movl  %[[R2]], (%rcx)
+; WIN64: movl  %[[R1]], (%rdx)
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index cbd38da..e39d007 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
+; Passing the same value in two registers creates a false interference that
+; only -join-physregs resolves. It could also be handled by a parallel copy.
+
 define i64 @foo(i64 %n, i64 %x) nounwind {
 entry:
 
@@ -40,9 +43,9 @@ entry:
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, %rax
 
-; EFI: leaq  15(%rcx), [[R1:%r..]]
+; EFI: leaq  15(%rcx), [[R1:%r.*]]
 ; EFI: andq  $-16, [[R1]]
-; EFI: movq  %rsp, [[R64:%r..]]
+; EFI: movq  %rsp, [[R64:%r.*]]
 ; EFI: subq  [[R1]], [[R64]]
 ; EFI: movq  [[R64]], %rsp
 
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 2465f23..07ccb23 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -39,7 +39,7 @@ define void @ccc(i64 %x) nounwind {
 
 ; This requires a mov and a 64-bit and.
 ; CHECK: ddd:
-; CHECK: movabsq $4294967296, %rax
+; CHECK: movabsq $4294967296, %r
 ; CHECK: andq %rax, %rdi
 
 define void @ddd(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/x86-64-extend-shift.ll b/test/CodeGen/X86/x86-64-extend-shift.ll
index 6852785..6ebaeee 100644
--- a/test/CodeGen/X86/x86-64-extend-shift.ll
+++ b/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -2,7 +2,7 @@
 ; Formerly there were two shifts.
 
 define i64 @baz(i32 %A) nounwind {
-; CHECK:  shlq  $49, %rax
+; CHECK:  shlq  $49, %r
         %tmp1 = shl i32 %A, 17
         %tmp2 = zext i32 %tmp1 to i64
         %tmp3 = shl i64 %tmp2, 32
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
new file mode 100644
index 0000000..fdf68f9
--- /dev/null
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; Splat patterns below
+
+
+define <4 x i32> @shl4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shl4
+; CHECK:      pslld
+; CHECK-NEXT: pslld
+  %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @shr4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shr4
+; CHECK:      psrld
+; CHECK-NEXT: psrld
+  %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @sra4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      sra4
+; CHECK:      psrad
+; CHECK-NEXT: psrad
+  %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <2 x i64> @shl2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shl2
+; CHECK:      psllq
+; CHECK-NEXT: psllq
+  %B = shl <2 x i64> %A,  < i64 2, i64 2>
+  %C = shl <2 x i64> %A,  < i64 9, i64 9>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+define <2 x i64> @shr2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shr2
+; CHECK:      psrlq
+; CHECK-NEXT: psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 8>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 1>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+define <8 x i16> @shl8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shl8
+; CHECK:      psllw
+; CHECK-NEXT: psllw
+  %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @shr8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shr8
+; CHECK:      psrlw
+; CHECK-NEXT: psrlw
+  %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @sra8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      sra8
+; CHECK:      psraw
+; CHECK-NEXT: psraw
+  %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+; non splat test
+
+
+define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
+entry:
+; CHECK: sll8_nosplat
+; CHECK-NOT: psll
+; CHECK-NOT: psll
+  %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+
+define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
+entry:
+; CHECK: shr2_nosplat
+; CHECK-NOT:  psrlq
+; CHECK-NOT:  psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 1>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 0>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+; Other shifts
+
+define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shl2_other
+; CHECK-not:      psllq
+  %B = shl <2 x i32> %A,  < i32 2, i32 2>
+  %C = shl <2 x i32> %A,  < i32 9, i32 9>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}
+
+define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shr2_other
+; CHECK-NOT:      psrlq
+  %B = lshr <2 x i32> %A,  < i32 8, i32 8>
+  %C = lshr <2 x i32> %A,  < i32 1, i32 1>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index b90d81a..178c59d 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -29,9 +29,8 @@ entry:
         ret i32 %tmp4
         
 ; X64: test3:
-; X64:	notl	[[A1:%esi|%edx]]
-; X64:	andl	[[A0:%edi|%ecx]], [[A1]]
-; X64:	movl	[[A1]], %eax
+; X64:	notl
+; X64:	andl
 ; X64:	shrl	%eax
 ; X64:	ret
 
@@ -139,7 +138,7 @@ entry:
   %t2 = add i32 %t1, -1
   ret i32 %t2
 ; X64: test8:
-; X64:   notl %eax
+; X64:   notl {{%eax|%edi|%ecx}}
 ; X32: test8:
 ; X32:   notl %eax
 }
diff --git a/test/CodeGen/XCore/bitrev.ll b/test/CodeGen/XCore/bitrev.ll
deleted file mode 100644
index 09202d3..0000000
--- a/test/CodeGen/XCore/bitrev.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep bitrev %t1.s | count 1 
-declare i32 @llvm.xcore.bitrev(i32)
-
-define i32 @test(i32 %val) {
-	%result = call i32 @llvm.xcore.bitrev(i32 %val)
-	ret i32 %result
-}
diff --git a/test/CodeGen/XCore/misc-intrinsics.ll b/test/CodeGen/XCore/misc-intrinsics.ll
new file mode 100644
index 0000000..f504a2e
--- /dev/null
+++ b/test/CodeGen/XCore/misc-intrinsics.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+%0 = type { i32, i32 }
+
+declare i32 @llvm.xcore.bitrev(i32)
+declare i32 @llvm.xcore.crc32(i32, i32, i32)
+declare %0 @llvm.xcore.crc8(i32, i32, i32)
+
+define i32 @bitrev(i32 %val) {
+; CHECK: bitrev:
+; CHECK: bitrev r0, r0
+	%result = call i32 @llvm.xcore.bitrev(i32 %val)
+	ret i32 %result
+}
+
+define i32 @crc32(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc32:
+; CHECK: crc32 r0, r1, r2
+	%result = call i32 @llvm.xcore.crc32(i32 %crc, i32 %data, i32 %poly)
+	ret i32 %result
+}
+
+define %0 @crc8(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc8:
+; CHECK: crc8 r0, r1, r1, r2
+	%result = call %0 @llvm.xcore.crc8(i32 %crc, i32 %data, i32 %poly)
+	ret %0 %result
+}
diff --git a/test/CodeGen/XCore/mul64.ll b/test/CodeGen/XCore/mul64.ll
index 77c6b42..3d373b1 100644
--- a/test/CodeGen/XCore/mul64.ll
+++ b/test/CodeGen/XCore/mul64.ll
@@ -9,7 +9,7 @@ entry:
 }
 ; CHECK: umul_lohi:
 ; CHECK: ldc [[REG:r[0-9]+]], 0
-; CHECK-NEXT: lmul r1, r0, r1, r0, [[REG]], [[REG]]
+; CHECK-NEXT: lmul {{.*}}, [[REG]], [[REG]]
 ; CHECK-NEXT: retsp 0
 
 define i64 @smul_lohi(i32 %a, i32 %b) {
@@ -23,9 +23,7 @@ entry:
 ; CHECK: ldc
 ; CHECK-NEXT: mov
 ; CHECK-NEXT: maccs
-; CHECK-NEXT: mov r0,
-; CHECK-NEXT: mov r1,
-; CHECK-NEXT: retsp 0
+; CHECK: retsp 0
 
 define i64 @mul64(i64 %a, i64 %b) {
 entry:
@@ -37,7 +35,6 @@ entry:
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: lmul
-; CHECK-NEXT: mov r0,
 
 define i64 @mul64_2(i64 %a, i32 %b) {
 entry:
@@ -50,4 +47,4 @@ entry:
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: add r1,
-; CHECK-NEXT: retsp 0
+; CHECK: retsp 0
author	dim <dim@FreeBSD.org>	2011-06-12 15:42:51 +0000
committer	dim <dim@FreeBSD.org>	2011-06-12 15:42:51 +0000
commit	ece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
tree	b3032e51d630e8070e9e08d6641648f195316a80 /test/CodeGen
parent	2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
download	FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz