893 files changed, 19794 insertions, 9722 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 78c6222..94c562b 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 8bde748..a016809 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -37,10 +37,11 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()
+  %exn = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  %eh_ptr = extractvalue {i8*, i32} %exn, 0
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  %eh_select2 = extractvalue {i8*, i32} %exn, 1
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -94,10 +95,6 @@ declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 0a157c9..426bd17 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: vstrhi.64
+;CHECK: vstrhi
   store double %1, double* %y
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 8bfd026..eb9c2d0 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: vldr.64
+; CHECK: vldr
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 7aae3ac..a8afc20 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index e47c038..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index cd1c9c8..a400b7b 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -10,28 +10,28 @@ entry:
   %4 = ashr i32 %3, 30
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float* %table, i32 %.sum
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %6 = load float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float* %table, i32 %.sum12
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %10 = load float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float* %table, i32 %.sum13
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %14 = load float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float* %table, i32 %.sum14
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %18 = load float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index c03c815..2842437 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -21,12 +21,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare void @_Unwind_SjLj_Resume(i8*)
@@ -75,8 +71,11 @@ try.cont:                                         ; preds = %lpad
   ret i32 %conv
 
 lpad:                                             ; preds = %entry
-  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* bitcast (%0* @_ZTI1A to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
   %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
   br i1 %3, label %try.cont, label %eh.resume
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index f57b7e6..4b47085 100644
--- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -34,10 +34,12 @@ return:                                           ; preds = %entry
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
-  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
 
@@ -54,10 +56,6 @@ declare arm_apcscc void @func2()
 
 declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
 
 declare arm_apcscc void @func3()
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 0422094..ec74880 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=trivial
 ; RUN: llc < %s -verify-machineinstrs -spiller=inline
 ; PR8612
 ;
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..770ad44 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,39 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK: foo:
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
-  %tmp40 = load <4 x i8>* undef
-  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
-  %conv42 = zext i8 %tmp41 to i32
-  %conv43 = sitofp i32 %conv42 to float
-  %div44 = fdiv float %conv43, 2.560000e+02
-  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
-  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit46, <4 x float>* undef
-  br i1 undef, label %if.then105, label %if.else109
-
-if.then105:                                       ; preds = %entry
-  br label %if.end114
-
-if.else109:                                       ; preds = %entry
-  br label %if.end114
-
-if.end114:                                        ; preds = %if.else109, %if.then105
-  %call185 = call float @bar()
-  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
-  %call189 = call float @bar()
-  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
-  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit191, <4 x float>* undef
+  tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
 ; CHECK: vpop {d10, d11}
 ; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 9484212..ca88eed 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
-@x1 = internal global i8 1
-@x2 = internal global i8 1
-@x3 = internal global i8 1
-@x4 = internal global i8 1
-@x5 = global i8 1
+@x1 = internal global i8 1, align 1
+@x2 = internal global i8 1, align 1
+@x3 = internal global i8 1, align 1
+@x4 = internal global i8 1, align 1
+@x5 = global i8 1, align 1
 
 ; Check debug info output for merged global.
 ; DW_AT_location
@@ -17,8 +17,7 @@ target triple = "thumbv7-apple-darwin10"
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset6
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281e9..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
index 0b5f962..d3394b5 100644
--- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -35,14 +35,14 @@ for.cond.backedge:
   br label %for.cond
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 lpad26:
-  %exn27 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
@@ -57,31 +57,26 @@ call8.i.i.i.noexc:
   ret void
 
 lpad44:
-  %exn45 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn45 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 eh.resume:
-  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
-  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
-  unreachable
+  %exn.slot.0 = phi { i8*, i32 } [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  resume { i8*, i32 } %exn.slot.0
 
 terminate.lpad:
-  %exn51 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn51 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   tail call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 }
 
 declare void @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
 
 declare void @_ZSt9terminatev()
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 1b5b8a9..091d037 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,12 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
 ; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
 
-%struct.config = type { i16, i16, i16, i16 }
-
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
 @read_buf = external global i32 (i8*, i32)*
 @window = external global [0 x i8]
 @lookahead = internal unnamed_addr global i32 0, align 4
-@eofile.b = internal unnamed_addr global i1 false
+@eofile.b = internal unnamed_addr global i32 0
 @ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f681c34..f2b0c5d 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,8 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset33
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
new file mode 100644
index 0000000..09db740
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -mattr=+neon,+neonfp -relocation-model=pic
+
+target triple = "armv6-none-linux-gnueabi"
+
+define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
+L.entry:
+  %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+  %1 = bitcast i16* %destValues to i8*
+  %2 = mul i32 %0, 6
+  %3 = getelementptr i8* %1, i32 %2
+  %4 = bitcast i8* %3 to <3 x i16>*
+  %5 = load <3 x i16>* %4, align 1
+  %6 = bitcast i16* %sourceA to i8*
+  %7 = getelementptr i8* %6, i32 %2
+  %8 = bitcast i8* %7 to <3 x i16>*
+  %9 = load <3 x i16>* %8, align 1
+  %10 = or <3 x i16> %9, %5
+  store <3 x i16> %10, <3 x i16>* %4, align 1
+  ret void
+}
+
+declare i32 @get_index(...)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
new file mode 100644
index 0000000..ff049c8
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -0,0 +1,21 @@
+; Make sure short memsets on ARM lower to stores, even when optimizing for size.
+; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+; CHECK-GENERIC:      strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-UNALIGNED:      strb
+; CHECK-UNALIGNED-NEXT: str 
+define void @foo(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
new file mode 100644
index 0000000..42b1491
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK:      vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
new file mode 100644
index 0000000..113cbfe
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@i8_res  = global <2 x i8> <i8 0, i8 0>
+@i8_src1 = global <2 x i8> <i8 1, i8 2>
+@i8_src2 = global <2 x i8> <i8 2, i8 1>
+
+define void @test_neon_vector_add_2xi8() nounwind {
+; CHECK: test_neon_vector_add_2xi8:
+  %1 = load <2 x i8>* @i8_src1
+  %2 = load <2 x i8>* @i8_src2
+  %3 = add <2 x i8> %1, %2
+  store <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
+
+define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
+; CHECK: test_neon_ld_st_volatile_with_ashr_2xi8:
+  %1 = load volatile <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>* @i8_src2
+  %3 = ashr <2 x i8> %1, %2
+  store volatile <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
new file mode 100644
index 0000000..2ab6a4f
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@src1_v2i16 = global <2 x i16> <i16 0, i16 1>
+@res_v2i16  = global <2 x i16> <i16 0, i16 0>
+
+declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
+
+define void @test_neon_call_return_v2i16() {
+; CHECK: test_neon_call_return_v2i16:
+  %1 = load <2 x i16>* @src1_v2i16
+  %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
+  store <2 x i16> %2, <2 x i16>* @res_v2i16
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
new file mode 100644
index 0000000..719571b
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @test1(<2 x double>* %A) {
+; CHECK: test1
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @test2(<2 x double>* %A) {
+; CHECK: test2
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x double> @test3(<2 x i32>* %A) {
+; CHECK: test3
+; CHECK: vcvt.f64.s32
+; CHECK: vcvt.f64.s32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
+
+define <2 x double> @test4(<2 x i32>* %A) {
+; CHECK: test4
+; CHECK: vcvt.f64.u32
+; CHECK: vcvt.f64.u32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
new file mode 100644
index 0000000..52aa0bf
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-regalloc
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This test calls shrinkToUses with an early-clobber redefined live range during
+; spilling.
+;
+;   Shrink: %vreg47,1.158257e-02 = [384r,400e:0)[400e,420r:1)  0@384r 1@400e
+;
+; The early-clobber instruction is an str:
+;
+;   %vreg12<earlyclobber,def> = t2STR_PRE %vreg6, %vreg12, 32, pred:14, pred:%noreg
+;
+; This tests that shrinkToUses handles the EC redef correctly.
+
+%struct.Transform_Struct.0.11.12.17.43.46.56.58.60 = type { [4 x [4 x double]] }
+
+define void @Compute_Axis_Rotation_Transform(%struct.Transform_Struct.0.11.12.17.43.46.56.58.60* nocapture %transform, double* nocapture %V1, double %angle) nounwind {
+entry:
+  store double 1.000000e+00, double* null, align 4
+  %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
+  store double 0.000000e+00, double* %arrayidx5.1.i, align 4
+  %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
+  store double 0.000000e+00, double* %arrayidx5.2.i, align 4
+  %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
+  store double 0.000000e+00, double* %arrayidx5.114.i, align 4
+  %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
+  store double 1.000000e+00, double* %arrayidx5.1.1.i, align 4
+  store double 0.000000e+00, double* null, align 4
+  store double 1.000000e+00, double* null, align 4
+  store double 0.000000e+00, double* null, align 4
+  %call = tail call double @cos(double %angle) nounwind readnone
+  %call1 = tail call double @sin(double %angle) nounwind readnone
+  %0 = load double* %V1, align 4
+  %arrayidx2 = getelementptr inbounds double* %V1, i32 1
+  %1 = load double* %arrayidx2, align 4
+  %mul = fmul double %0, %1
+  %sub = fsub double 1.000000e+00, %call
+  %mul3 = fmul double %mul, %sub
+  %2 = load double* undef, align 4
+  %mul5 = fmul double %2, %call1
+  %add = fadd double %mul3, %mul5
+  store double %add, double* %arrayidx5.1.i, align 4
+  %3 = load double* %V1, align 4
+  %mul11 = fmul double %3, undef
+  %mul13 = fmul double %mul11, %sub
+  %4 = load double* %arrayidx2, align 4
+  %mul15 = fmul double %4, %call1
+  %sub16 = fsub double %mul13, %mul15
+  store double %sub16, double* %arrayidx5.2.i, align 4
+  %5 = load double* %V1, align 4
+  %6 = load double* %arrayidx2, align 4
+  %mul22 = fmul double %5, %6
+  %mul24 = fmul double %mul22, %sub
+  %sub27 = fsub double %mul24, undef
+  store double %sub27, double* %arrayidx5.114.i, align 4
+  ret void
+}
+
+declare double @cos(double) nounwind readnone
+
+declare double @sin(double) nounwind readnone
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
new file mode 100644
index 0000000..5409f8c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0.0 | FileCheck %s
+; rdar://10464621
+
+; DAG combine increases loads from packed types. ARM load / store optimizer then
+; combined them into a ldm which causes runtime exception.
+
+%struct.InformationBlock = type <{ i32, %struct.FlagBits, %struct.FlagBits }>
+%struct.FlagBits = type <{ [4 x i32] }>
+
+@infoBlock = external global %struct.InformationBlock
+
+define hidden void @foo() {
+; CHECK: foo:
+; CHECK: ldr.w
+; CHECK: ldr.w
+; CHECK-NOT: ldm
+entry:
+  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
+  %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
+  %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
+  %insert27 = insertvalue [4 x i32] %insert25, i32 %tmp19, 3
+  %insert = insertvalue [4 x i32] undef, i32 %tmp, 0
+  %insert7 = insertvalue [4 x i32] %insert, i32 %tmp3, 1
+  %insert9 = insertvalue [4 x i32] %insert7, i32 %tmp4, 2
+  %insert11 = insertvalue [4 x i32] %insert9, i32 %tmp5, 3
+  tail call void @bar([4 x i32] %insert27, [4 x i32] %insert11)
+  ret void
+}
+
+declare void @bar([4 x i32], [4 x i32])
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
new file mode 100644
index 0000000..6fbae19
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:{{.*}}
+; CHECK:      movt    r1, :upper16:{{.*}}
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:       movt  [[reg0]], :upper16:{{.*}}
+; CHECK:       vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
new file mode 100644
index 0000000..0c90f4c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+; <rdar://problem/10497732>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i32 1
+@x2 = internal global i64 12
+
+define i64 @f() {
+  %ax = load i32* @x1
+  %a = zext i32 %ax to i64
+  %b = load i64* @x2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; We can global-merge the i64 in theory, but the current code doesn't handle
+; the alignment correctly; for the moment, just check that we don't do it.
+; See also 
+
+; CHECK-NOT: MergedGlobals
+; CHECK: _x2
+; CHECK-NOT: MergedGlobals
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
new file mode 100644
index 0000000..5ce600d
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; Radar 10266272
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios4.0.0"
+; STATS-NOT: machine-sink
+
+define i32 @foo(i32 %h) nounwind readonly ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %cmp = icmp slt i32 0, %h
+  br i1 %cmp, label %for.body, label %if.end299
+
+for.body:                                         ; preds = %for.cond
+  %v.5 = select i1 undef, i32 undef, i32 0
+  %0 = load i8* undef, align 1, !tbaa !0
+  %conv88 = zext i8 %0 to i32
+  %sub89 = sub nsw i32 0, %conv88
+  %v.8 = select i1 undef, i32 undef, i32 %sub89
+  %1 = load i8* null, align 1, !tbaa !0
+  %conv108 = zext i8 %1 to i32
+  %2 = load i8* undef, align 1, !tbaa !0
+  %conv110 = zext i8 %2 to i32
+  %sub111 = sub nsw i32 %conv108, %conv110
+  %cmp112 = icmp slt i32 %sub111, 0
+  %sub115 = sub nsw i32 0, %sub111
+  %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
+  %add62 = add i32 0, %v.5
+  %add73 = add i32 %add62, 0
+  %add84 = add i32 %add73, 0
+  %add95 = add i32 %add84, %v.8
+  %add106 = add i32 %add95, 0
+  %add117 = add i32 %add106, %v.10
+  %add128 = add i32 %add117, 0
+  %add139 = add i32 %add128, 0
+  %add150 = add i32 %add139, 0
+  %add161 = add i32 %add150, 0
+  %add172 = add i32 %add161, 0
+  br i1 undef, label %for.cond, label %if.end299
+
+if.end299:                                        ; preds = %for.body, %for.cond
+  %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+  ret i32 %s.10
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
new file mode 100644
index 0000000..c9ea691
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 -verify-machineinstrs < %s
+; PR12165
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "arm-none-linux"
+
+define hidden void @_strtod_r() nounwind {
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %1, %0
+  br i1 undef, label %3, label %8
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %7
+
+; <label>:4                                       ; preds = %3
+  %5 = call i32 @llvm.flt.rounds()
+  %6 = icmp eq i32 %5, 1
+  br i1 %6, label %8, label %7
+
+; <label>:7                                       ; preds = %4, %3
+  unreachable
+
+; <label>:8                                       ; preds = %4, %2
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br label %10
+
+; <label>:10                                      ; preds = %9, %8
+  ret void
+}
+
+declare i32 @llvm.flt.rounds() nounwind
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
new file mode 100644
index 0000000..6206cd7
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: vst1.32
+  %0 = load <3 x i16> * %srcA, align 8
+  %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+  store <2 x i16> %1, <2 x i16> * %dst, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 0000000..0ff4f51
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
new file mode 100644
index 0000000..33ad187
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+  br i1 undef, label %5, label %1
+
+; <label>:1                                       ; preds = %0
+  %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %2 to <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
+  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+; <label>:5                                       ; preds = %0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
new file mode 100644
index 0000000..6f50f27
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+;target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
+bb4:
+  %tmp = extractelement <2 x float> undef, i32 0
+  br i1 undef, label %bb18, label %bb5
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = fadd float %tmp, -1.500000e+01
+  %tmp7 = fdiv float %tmp6, 2.000000e+01
+  %tmp8 = fadd float %tmp7, 1.000000e+00
+  %tmp9 = fdiv float 1.000000e+00, %tmp8
+  %tmp10 = fsub float 1.000000e+00, %tmp9
+  %tmp11 = fmul float %tmp10, 1.000000e+01
+  %tmp12 = fadd float %tmp11, 1.500000e+01
+  %tmp13 = fdiv float %tmp12, %tmp
+  %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
+  %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
+  %tmp17 = fadd <4 x float> %tmp16, %arg
+  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  br label %bb18
+
+bb18:                                             ; preds = %bb5, %bb4
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 92aff70..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -3,14 +3,48 @@
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
 
-define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: t1:
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
   ret i32 %2
 }
+
+; Avoid partial CPSR dependency via loop backedge.
+; rdar://10357570
+define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
+entry:
+; CHECK: t2:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK-NOT: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..94edff5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
@@ -96,3 +96,70 @@ bb:
   tail call void @foo() nounwind
   ret void
 }
+
+; Make sure codegenprep is duplicating ret instructions to enable tail calls.
+; rdar://11140249
+define i32 @t8(i32 %x) nounwind ssp {
+entry:
+; CHECKT2D: t8:
+; CHECKT2D-NOT: push
+; CHECKT2D-NOT
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+; CHECKT2D: bne.w _a
+  %call = tail call i32 @a(i32 %x) nounwind
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %and1 = and i32 %x, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+; CHECKT2D: bne.w _b
+  %call4 = tail call i32 @b(i32 %x) nounwind
+  br label %return
+
+if.end5:                                          ; preds = %if.end
+; CHECKT2D: b.w _c
+  %call6 = tail call i32 @c(i32 %x) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ]
+  ret i32 %retval.0
+}
+
+declare i32 @a(i32)
+
+declare i32 @b(i32)
+
+declare i32 @c(i32)
+
+; PR12419
+; rdar://11195178
+; Use the correct input chain for the tailcall node or else the call to
+; _ZN9MutexLockD1Ev would be lost.
+%class.MutexLock = type { i8 }
+
+@x = external global i32, align 4
+
+define i32 @t9() nounwind {
+; CHECKT2D: t9:
+; CHECKT2D: blx __ZN9MutexLockC1Ev
+; CHECKT2D: blx __ZN9MutexLockD1Ev
+; CHECKT2D: b.w ___divsi3
+  %lock = alloca %class.MutexLock, align 1
+  %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
+  %2 = load i32* @x, align 4
+  %3 = sdiv i32 1000, %2
+  %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
+  ret i32 %3
+}
+
+declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 0f9543f..107e79a 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -26,7 +26,7 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
   %8 = load i32** %7                              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index e381e00..5b6a584 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
 define i32 @test(i32 %x) {
 ; CHECK: test
 ; CHECK: clz r0, r0
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
new file mode 100644
index 0000000..7316452
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; This is really an LSR test: Make sure that cmp is using the incremented
+; induction variable.
+; CHECK: %if.end8
+; CHECK: add{{(s|\.w)?}} [[IV:r[0-9]+]], {{.*}}#1
+; CHECK: cmp [[IV]], #
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
new file mode 100644
index 0000000..6419292
--- /dev/null
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN:      .long _f151
+; DARWIN-NEXT: .long _f152
+
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
+; ELF:      .long    f152
+
+; GNUEABI:      .section .init_array.151,"aw",%init_array
+; GNUEABI:      .long    f151
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
+
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
+
+define void @f151() {
+entry:
+        ret void
+}
+
+define void @f152() {
+entry:
+        ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 1d2ced3..5ebca53 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: rbit
 ; CHECK: clz
-  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
new file mode 100644
index 0000000..18f57ea
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+define float @g(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  %1 = load <4 x i16>* %in
+  ; CHECK-NOT: uxth
+  %2 = extractelement <4 x i16> %1, i32 0
+  ; CHECK: vcvt.f32.u32
+  %3 = uitofp i16 %2 to float
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 2c59316..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: mydata <- [sp+#4]+#0
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 8c9095e..325eea0 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index e83a83d..97c9c66 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -3,13 +3,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: DW_OP_regx for Q register: D1
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 ;CHECK-NEXT: DW_OP_regx for Q register: D2
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 548c9bd..db41143 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -2,8 +2,7 @@
 ; Radar 9309221
 ; Test dwarf reg no for s16
 ;CHECK: DW_OP_regx for S register
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_bit_piece 32 0
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ee777ce..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset8 = Ltmp10-Ltmp9                    @ Loc expr size
-;CHECK-NEXT:        .short  Lset8
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
+;CHECK-NEXT:        .short  Lset[[N]]
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index e475508..d1252f4 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -3,12 +3,6 @@ target triple = "armv6-apple-macosx10.6"
 
 declare void @func()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
-declare void @llvm.eh.resume(i8*, i32)
-
 declare i32 @__gxx_personality_sj0(...)
 
 define void @test0() {
@@ -20,10 +14,9 @@ cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception()
-  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
-  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
-  unreachable
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  resume { i8*, i32 } %exn
 }
 
 ; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000..dbb634d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
new file mode 100644
index 0000000..7c532d5
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; THUMB: t1:
+; ARM: t1:
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; THUMB-NOT: b LBB0_1
+; ARM-NOT:  b LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; THUMB: b LBB0_4
+; ARM:  b LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; THUMB-NOT: b LBB0_5
+; ARM-NOT:  b LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
new file mode 100644
index 0000000..dd460b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t0(i1 zeroext %a) nounwind {
+  %1 = zext i1 %a to i32
+  ret i32 %1
+}
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ARM: foo
+; THUMB: foo
+;; Materialize i1 1
+; ARM: movw r2, #1
+;; zero-ext
+; ARM: and r2, r2, #1
+; THUMB: and r2, r2, #1
+  %1 = call i32 @t0(i1 zeroext 1)
+; ARM: sxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxtb	r2, r1
+; THUMB: mov r0, r2
+  %2 = call i32 @t1(i8 signext %a)
+; ARM: uxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxtb	r2, r1
+; THUMB: mov r0, r2
+  %3 = call i32 @t2(i8 zeroext %a)
+; ARM: sxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxth	r2, r1
+; THUMB: mov r0, r2
+  %4 = call i32 @t3(i16 signext %b)
+; ARM: uxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxth	r2, r1
+; THUMB: mov r0, r2
+  %5 = call i32 @t4(i16 zeroext %b)
+
+;; A few test to check materialization
+;; Note: i1 1 was materialized with t1 call
+; ARM: movw r1, #255
+%6 = call i32 @t2(i8 zeroext 255)
+; ARM: movw r1, #65535
+; THUMB: movw r1, #65535
+%7 = call i32 @t4(i16 zeroext 65535)
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  %5 = call zeroext i1 @t9()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+declare zeroext i1 @t9();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ARM: @t10
+; ARM: movw r0, #0
+; ARM: movw r1, #248
+; ARM: movw r2, #187
+; ARM: movw r3, #28
+; ARM: movw r9, #40
+; ARM: movw r12, #186
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: uxtb r2, r2
+; ARM: uxtb r3, r3
+; ARM: uxtb r9, r9
+; ARM: str r9, [sp]
+; ARM: uxtb r9, r12
+; ARM: str r9, [sp, #4]
+; ARM: bl _bar
+; THUMB: @t10
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: movs r1, #248
+; THUMB: movt r1, #0
+; THUMB: movs r2, #187
+; THUMB: movt r2, #0
+; THUMB: movs r3, #28
+; THUMB: movt r3, #0
+; THUMB: movw r9, #40
+; THUMB: movt r9, #0
+; THUMB: movw r12, #186
+; THUMB: movt r12, #0
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: uxtb r2, r2
+; THUMB: uxtb r3, r3
+; THUMB: uxtb.w r9, r9
+; THUMB: str.w r9, [sp]
+; THUMB: uxtb.w r9, r12
+; THUMB: str.w r9, [sp, #4]
+; THUMB: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
new file mode 100644
index 0000000..660156a
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ARM: t1a
+; THUMB: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ARM: vcmpe.f32 s{{[0-9]+}}, #0
+; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ARM: t1b
+; THUMB: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ARM: t2a
+; THUMB: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ARM: vcmpe.f64 d{{[0-9]+}}, #0
+; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ARM: t2b
+; THUMB: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ARM: t4
+; THUMB: t4
+  %cmp = icmp eq i8 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ARM: t5
+; THUMB: t5
+  %cmp = icmp eq i8 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ARM: t6
+; THUMB: t6
+  %cmp = icmp eq i16 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ARM: t7
+; THUMB: t7
+  %cmp = icmp eq i16 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ARM: t8
+; THUMB: t8
+  %cmp = icmp eq i32 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ARM: t9
+; THUMB: t9
+  %cmp = icmp eq i32 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ARM: t10
+; THUMB: t10
+  %cmp = icmp eq i32 %a, 384
+; ARM: cmp r{{[0-9]}}, #384
+; THUMB: cmp.w r{{[0-9]}}, #384
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ARM: t11
+; THUMB: t11
+  %cmp = icmp eq i32 %a, 4096
+; ARM: cmp r{{[0-9]}}, #4096
+; THUMB: cmp.w r{{[0-9]}}, #4096
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ARM: t12
+; THUMB: t12
+  %cmp = icmp ugt i8 %a, -113
+; ARM: cmp r{{[0-9]}}, #143
+; THUMB: cmp r{{[0-9]}}, #143
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; rdar://11038907
+; When comparing LONG_MIN/INT_MIN use a cmp instruction.
+define void @t13() nounwind ssp {
+entry:
+; ARM: t13
+; THUMB: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ARM: cmp r{{[0-9]}}, #-2147483648
+; THUMB: cmp.w r{{[0-9]}}, #-2147483648
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
new file mode 100644
index 0000000..686ccad
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test sitofp
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: sitofp_single_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
new file mode 100644
index 0000000..7e147c7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Target-specific selector can't properly handle the double because it isn't
+; being passed via a register, so the materialized arguments become dead code.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; THUMB: main
+  call void @printArgsNoRet(i32 1, float 0x4000CCCCC0000000, i8 signext 99, double 4.100000e+00)
+; THUMB: blx _printArgsNoRet
+; THUMB-NOT: ldr
+; THUMB-NOT: vldr
+; THUMB-NOT: vmov
+; THUMB-NOT: ldr
+; THUMB-NOT: sxtb
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: pop
+  ret i32 0
+}
+
+declare void @printArgsNoRet(i32 %a1, float %a2, i8 signext %a3, double %a4)
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
new file mode 100644
index 0000000..61bd185
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+
+define void @t1() nounwind uwtable ssp {
+; ARM: t1
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t1
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ARM: t2
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t2
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ARM: t3
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t3
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ARM: t4
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t4
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ARM: t5
+; ARM: ldrsh
+; ARM-NOT: sxth
+; THUMB: t5
+; THUMB: ldrsh
+; THUMB-NOT: sxth
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ARM: t6
+; ARM: ldrsb
+; ARM-NOT: sxtb
+; THUMB: t6
+; THUMB: ldrsb
+; THUMB-NOT: sxtb
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
new file mode 100644
index 0000000..8764bef
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_unsigned
+; ARM: uxth r0, r0
+; ARM: uxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_unsigned
+; THUMB: uxth r0, r0
+; THUMB: uxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp ult i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_signed
+; ARM: sxtb r0, r0
+; ARM: sxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_signed
+; THUMB: sxtb r0, r0
+; THUMB: sxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp sgt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
+entry:
+; ARM: icmp_i1_unsigned
+; ARM: and r0, r0, #1
+; ARM: and r1, r1, #1
+; ARM: cmp r0, r1
+; THUMB: icmp_i1_unsigned
+; THUMB: and r0, r0, #1
+; THUMB: and r1, r1, #1
+; THUMB: cmp r0, r1
+  %cmp = icmp ult i1 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
new file mode 100644
index 0000000..e6bdfa7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
+@temp = common global [60 x i8] zeroinitializer, align 1
+
+define void @t1() nounwind ssp {
+; ARM: t1
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
+; ARM: add r0, r0, #5
+; ARM: movw r1, #64
+; ARM: movw r2, #10
+; ARM: uxtb r1, r1
+; ARM: bl _memset
+; THUMB: t1
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
+; THUMB: adds r0, #5
+; THUMB: movs r1, #64
+; THUMB: movt r1, #0
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: uxtb r1, r1
+; THUMB: bl _memset
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t2() nounwind ssp {
+; ARM: t2
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #17
+; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp]                @ 4-byte Reload
+; ARM: bl _memcpy
+; THUMB: t2
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #17
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t3() nounwind ssp {
+; ARM: t3
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: mov r0, r1
+; ARM: bl _memmove
+; THUMB: t3
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+define void @t4() nounwind ssp {
+; ARM: t4
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
new file mode 100644
index 0000000..dfb8c53
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 1
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr r{{[0-9]}}, [r0, #4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 63
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr.w r{{[0-9]}}, [r0, #252]
+  ret i32 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 1
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh r{{[0-9]}}, [r0, #2]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 63
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
+  ret i16 %0
+}
+
+define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 1
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb r{{[0-9]}}, [r0, #1]
+  ret i8 %0
+}
+
+define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 63
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
new file mode 100644
index 0000000..2a88678
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-252]
+  ret i32 %0
+}
+
+define i32 @t3(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t3
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0]
+  ret i32 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t5
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t6
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0]
+  ret i16 %0
+}
+
+define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t7
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
+  ret i8 %0
+}
+
+define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t8
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
+  ret i8 %0
+}
+
+define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t9
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0]
+  ret i8 %0
+}
+
+define void @t10(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t10
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-4]
+  ret void
+}
+
+define void @t11(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t11
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-252]
+  ret void
+}
+
+define void @t12(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t12
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t13(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t13
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-2]
+  ret void
+}
+
+define void @t14(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t14
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-254]
+  ret void
+}
+
+define void @t15(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t15
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t16(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t16
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-1]
+  ret void
+}
+
+define void @t17(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t17
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-255]
+  ret void
+}
+
+define void @t18(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t18
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0]
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
new file mode 100644
index 0000000..e8cc2b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; rdar://10418009
+
+define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-16]
+  ret i16 %0
+}
+
+define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i16* %a, i64 -16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-32]
+  ret i16 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %a, i64 -127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  %0 = load i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #16]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i16* %a, i64 16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #32]
+  ret i16 %0
+}
+
+define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t7
+  %add.ptr = getelementptr inbounds i16* %a, i64 127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #254]
+  ret i16 %0
+}
+
+define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t8
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  %0 = load i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define void @t9(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t9
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  store i16 0, i16* %add.ptr, align 2
+; ARM: strh	r1, [r0, #-16]
+  ret void
+}
+
+; mvn r1, #255
+; strh r2, [r0, r1]
+define void @t10(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t10
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define void @t11(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t11
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  store i16 0, i16* %add.ptr, align 2
+; ARM strh r{{[1-9]}}, [r0, #16]
+  ret void
+}
+
+; mov r1, #256
+; strh r2, [r0, r1]
+define void @t12(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t12
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t13
+  %add.ptr = getelementptr inbounds i8* %a, i64 -8
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-8]
+  ret i8 %0
+}
+
+define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t14
+  %add.ptr = getelementptr inbounds i8* %a, i64 -255
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-255]
+  ret i8 %0
+}
+
+define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t15
+  %add.ptr = getelementptr inbounds i8* %a, i64 -256
+  %0 = load i8* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrsb r0, [r0]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
new file mode 100644
index 0000000..b180e43
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; rdar://10412592
+
+; Note: The Thumb code is being generated by the target-independent selector.
+
+define void @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+; ARM: mvn r0, #0
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #65535
+  call void @foo(i32 -1)
+  ret void
+}
+
+declare void @foo(i32)
+
+define void @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+; ARM: mvn r0, #233
+; THUMB: movw r0, #65302
+; THUMB: movt r0, #65535
+  call void @foo(i32 -234)
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+; ARM: mvn	r0, #256
+; THUMB: movw r0, #65279
+; THUMB: movt r0, #65535
+  call void @foo(i32 -257)
+  ret void
+}
+
+; Load from constant pool
+define void @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+; ARM: ldr	r0
+; THUMB: movw r0, #65278
+; THUMB: movt r0, #65535
+  call void @foo(i32 -258)
+  ret void
+}
+
+define void @t5() nounwind {
+entry:
+; ARM: t5
+; THUMB: t5
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; ARM: t6
+; THUMB: t6
+; ARM: mvn r0, #978944
+; THUMB: movw r0, #4095
+; THUMB: movt r0, #65521
+  call void @foo(i32 -978945)
+  ret void
+}
+
+define void @t7() nounwind {
+entry:
+; ARM: t7
+; THUMB: t7
+; ARM: mvn r0, #267386880
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #61455
+  call void @foo(i32 -267386881)
+  ret void
+}
+
+define void @t8() nounwind {
+entry:
+; ARM: t8
+; THUMB: t8
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t9() nounwind {
+entry:
+; ARM: t9
+; THUMB: t9
+; ARM: mvn r0, #2130706432
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #33023
+  call void @foo(i32 -2130706433)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 08dcc64..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
new file mode 100644
index 0000000..689b169
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Sign-extend of i1 currently not supported by fast-isel
+;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
+;entry:
+;  ret i1 %a
+;}
+
+define zeroext i1 @ret1(i1 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret1
+; CHECK: and r0, r0, #1
+; CHECK: bx lr
+  ret i1 %a
+}
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret2
+; CHECK: sxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret3
+; CHECK: uxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret4
+; CHECK: sxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret5
+; CHECK: uxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
new file mode 100644
index 0000000..b83a733
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i1 %c) nounwind readnone {
+entry:
+; ARM: t1
+; ARM: movw r{{[1-9]}}, #10
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t1
+; THUMB: movs r{{[1-9]}}, #10
+; THUMB: movt r{{[1-9]}}, #0
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 10, i32 20
+  ret i32 %0
+}
+
+define i32 @t2(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t2
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t2
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 20
+  ret i32 %0
+}
+
+define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
+entry:
+; ARM: t3
+; ARM: cmp r0, #0
+; ARM: movne r{{[1-9]}}, r{{[1-9]}}
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t3
+; THUMB: cmp r0, #0
+; THUMB: it ne
+; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 %b
+  ret i32 %0
+}
+
+define i32 @t4(i1 %c) nounwind readnone {
+entry:
+; ARM: t4
+; ARM: mvn r{{[1-9]}}, #9
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #0
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t4
+; THUMB: movw r{{[1-9]}}, #65526
+; THUMB: movt r{{[1-9]}}, #65535
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #0
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 -10, i32 -1
+  ret i32 %0
+}
+
+define i32 @t5(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t5
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #1
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t5
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #1
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -2
+  ret i32 %0
+}
+
+; Check one large negative immediates.
+define i32 @t6(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t6
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #978944
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t6
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #978944
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -978945
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 465e85f..417e2d9 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,19 +142,87 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
-; ARM: ldr r0, [r0]
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
+}
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @test5(double %a, double %b) nounwind {
+entry:
+; ARM: @test5
+; THUMB: @test5
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
 }
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..27fa2b0 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -40,28 +40,12 @@ entry:
   ret double %1
 }
 
-; rdar://9059537
-define i32 @test4() ssp {
-entry:
-; SOFT: test4:
-; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
-; This S-reg must be the first sub-reg of the last D-reg on vbsl.
-; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
-; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
-; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
-; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
-  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
-  %conv81 = fptrunc double %call80 to float
-  %tmp88 = bitcast float %conv81 to i32
-  ret i32 %tmp88
-}
-
 ; rdar://9287902
-define float @test5() nounwind {
+define float @test4() nounwind {
 entry:
-; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
+; SOFT: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 227e4e8..1ba561d 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -3,7 +3,7 @@
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
   %conv = zext i32 %a to i64
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv, i1 true)
 ; CHECK: clz
 ; CHECK-NOT: adds
   %cast = trunc i64 %tmp1 to i32
@@ -11,4 +11,4 @@ entry:
   ret i32 %sub
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ac023d1..93601cf 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -42,7 +42,7 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: vldr.64
+;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 7c0dd0e..2d8f710 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr.32 s0,
-; NAN: vldr.32 s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 2e6b3e3..4a4c5b1 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -5,7 +5,7 @@ define i32 @f7(float %a, float %b) {
 entry:
 ; CHECK: f7:
 ; CHECK: vcmpe.f32
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
 ; CHECK-NOT: vmrs
 ; CHECK: movwvs
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 3833933..8faa578 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: vldr.32{{.*}}[
+; CHECK: vldr{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define float @f2offset(float* %v, float %u) {
 ; CHECK: f2offset:
-; CHECK: vldr.32{{.*}}, #4]
+; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float* %v, i32 1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -25,7 +25,7 @@ define float @f2offset(float* %v, float %u) {
 
 define float @f2noffset(float* %v, float %u) {
 ; CHECK: f2noffset:
-; CHECK: vldr.32{{.*}}, #-4]
+; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float* %v, i32 -1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -34,7 +34,7 @@ define float @f2noffset(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: vstr.32{{.*}}[
+; CHECK: vstr{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..a8b3999
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
+
+define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f32
+; CHECK: vfma.f32
+  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %tmp1
+}
+
+define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f64
+; CHECK: vfma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %tmp1
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_v2f32
+; CHECK: vfma.f32
+  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %tmp1
+}
+
+define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64_2
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64_2
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64
+; CHECK: vfnma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %tmp2 = fsub double -0.0, %tmp1
+  ret double %tmp2
+}
+
+define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64_2
+; CHECK: vfnma.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = fsub double -0.0, %c
+  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
+  ret double %tmp3
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 28bf221..1732df3 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -14,7 +14,7 @@
 ; offset.  Having the starting offset in range is not sufficient.
 ; When this works properly, @g3 is placed in a separate chunk of merged globals.
 ; CHECK: _MergedGlobals1:
-@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
 
 ; Global variables that can be placed in BSS should be kept together in a
 ; separate pool of merged globals.
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 5e7e3f2..eb71149 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -70,6 +70,5 @@ define i32 @test1() {
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
 ; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 63f8557..0f142ee 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -18,7 +18,7 @@ bb.nph:                                           ; preds = %entry
 
 bb:                                               ; preds = %bb4, %bb.nph
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
@@ -33,7 +33,7 @@ bb1:                                              ; preds = %bb
 ; CHECK-NOT: vcmpemi
 ; CHECK-NOT: vmrsmi
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
   %6 = load double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index cb5243c..2fcc45f 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -110,3 +110,13 @@ entry:
   call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
+
+; Radar 10551006
+
+define <4 x i32> @t11(i32* %p) nounwind {
+entry:
+; CHECK: t11
+; CHECK: vld1.s32 {d16[], d17[]}, [r0]
+  %0 = tail call <4 x i32> asm "vld1.s32 {${0:e}[], ${0:f}[]}, [$1]", "=w,r"(i32* %p) nounwind
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd-memoper.ll b/test/CodeGen/ARM/ldrd-memoper.ll
new file mode 100644
index 0000000..f1a1121
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd-memoper.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o /dev/null -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -debug-only=arm-ldst-opt 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; PR8113: ARMLoadStoreOptimizer must preserve memoperands.
+
+@b = external global i64*
+
+; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 8010f20..3f8fd75 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,24 +1,70 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
-; rdar://r6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
-; Magic ARM pair hints works best with linearscan.
+; Magic ARM pair hints works best with linearscan / fast.
+
+; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
+; register when interrupted or faulted.
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;V6:   ldrd r2, r3, [r2]
-
-;V5:   ldr r{{[0-9]+}}, [r2]
-;V5:   ldr r{{[0-9]+}}, [r2, #4]
+; A8: t:
+; A8:   ldrd r2, r3, [r2]
 
-;EABI: ldr r{{[0-9]+}}, [r2]
-;EABI: ldr r{{[0-9]+}}, [r2, #4]
+; M3: t:
+; M3-NOT: ldrd
+; M3: ldm.w r2, {r2, r3}
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldrd
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index d5aac2e..a99a7ec 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -25,8 +25,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -38,8 +38,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
new file mode 100644
index 0000000..5283f57
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LSR should compare against the post-incremented induction variable.
+; In this case, the immediate value is -2 which requires a cmn instruction.
+;
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
+; CHECK: cmn{{.*}}[[IV]], #2
+; CHECK: bne
+define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
+entry:
+  %cmp3 = icmp eq i32 %i, -2
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
+  %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
+  %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, %b.04
+  %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
+  %sub = add nsw i32 %i.addr.05, -2
+  %cmp = icmp eq i32 %i.addr.05, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
deleted file mode 100644
index 4737901..0000000
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ /dev/null
@@ -1,640 +0,0 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
-
-; LSR should recognize that this is an unrolled loop which can use
-; constant offset addressing, so that each of the following stores
-; uses the same register.
-
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
-
-; We can also save a register in the outer loop, but that requires
-; performing LSR on the outer loop.
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
-%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
-%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
-%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
-%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
-%4 = type opaque
-%5 = type opaque
-%6 = type { void (%2*)*, i32, i32, i32, i32 }
-%7 = type { [8 x i32], [12 x i32] }
-%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
-%9 = type { [64 x i16], i32 }
-%10 = type { [17 x i8], [256 x i8], i32 }
-%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
-%12 = type { %12*, i8, i32, i32, i8* }
-%13 = type { void (%0*)*, void (%0*)*, i32 }
-%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
-%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
-%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
-%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
-%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
-%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
-%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
-%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
-%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
-%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
-
-define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
-bb:
-  %t = alloca [64 x float], align 4
-  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
-  %t6 = load i8** %t5, align 4
-  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
-  %t8 = load i8** %t7, align 4
-  br label %bb9
-
-bb9:
-  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
-  %t11 = add i32 %t10, 8
-  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
-  %t13 = add i32 %t10, 16
-  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
-  %t15 = add i32 %t10, 24
-  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
-  %t17 = add i32 %t10, 32
-  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
-  %t19 = add i32 %t10, 40
-  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
-  %t21 = add i32 %t10, 48
-  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
-  %t23 = add i32 %t10, 56
-  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
-  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
-  %t26 = shl i32 %t10, 5
-  %t27 = or i32 %t26, 8
-  %t28 = getelementptr i8* %t8, i32 %t27
-  %t29 = bitcast i8* %t28 to float*
-  %t30 = or i32 %t26, 16
-  %t31 = getelementptr i8* %t8, i32 %t30
-  %t32 = bitcast i8* %t31 to float*
-  %t33 = or i32 %t26, 24
-  %t34 = getelementptr i8* %t8, i32 %t33
-  %t35 = bitcast i8* %t34 to float*
-  %t36 = or i32 %t26, 4
-  %t37 = getelementptr i8* %t8, i32 %t36
-  %t38 = bitcast i8* %t37 to float*
-  %t39 = or i32 %t26, 12
-  %t40 = getelementptr i8* %t8, i32 %t39
-  %t41 = bitcast i8* %t40 to float*
-  %t42 = or i32 %t26, 20
-  %t43 = getelementptr i8* %t8, i32 %t42
-  %t44 = bitcast i8* %t43 to float*
-  %t45 = or i32 %t26, 28
-  %t46 = getelementptr i8* %t8, i32 %t45
-  %t47 = bitcast i8* %t46 to float*
-  %t48 = getelementptr i8* %t8, i32 %t26
-  %t49 = bitcast i8* %t48 to float*
-  %t50 = shl i32 %t10, 3
-  %t51 = or i32 %t50, 1
-  %t52 = getelementptr i16* %a2, i32 %t51
-  %t53 = or i32 %t50, 2
-  %t54 = getelementptr i16* %a2, i32 %t53
-  %t55 = or i32 %t50, 3
-  %t56 = getelementptr i16* %a2, i32 %t55
-  %t57 = or i32 %t50, 4
-  %t58 = getelementptr i16* %a2, i32 %t57
-  %t59 = or i32 %t50, 5
-  %t60 = getelementptr i16* %a2, i32 %t59
-  %t61 = or i32 %t50, 6
-  %t62 = getelementptr i16* %a2, i32 %t61
-  %t63 = or i32 %t50, 7
-  %t64 = getelementptr i16* %a2, i32 %t63
-  %t65 = getelementptr i16* %a2, i32 %t50
-  %t66 = load i16* %t52, align 2
-  %t67 = icmp eq i16 %t66, 0
-  %t68 = load i16* %t54, align 2
-  %t69 = icmp eq i16 %t68, 0
-  %t70 = and i1 %t67, %t69
-  br i1 %t70, label %bb71, label %bb91
-
-bb71:
-  %t72 = load i16* %t56, align 2
-  %t73 = icmp eq i16 %t72, 0
-  br i1 %t73, label %bb74, label %bb91
-
-bb74:
-  %t75 = load i16* %t58, align 2
-  %t76 = icmp eq i16 %t75, 0
-  br i1 %t76, label %bb77, label %bb91
-
-bb77:
-  %t78 = load i16* %t60, align 2
-  %t79 = icmp eq i16 %t78, 0
-  br i1 %t79, label %bb80, label %bb91
-
-bb80:
-  %t81 = load i16* %t62, align 2
-  %t82 = icmp eq i16 %t81, 0
-  br i1 %t82, label %bb83, label %bb91
-
-bb83:
-  %t84 = load i16* %t64, align 2
-  %t85 = icmp eq i16 %t84, 0
-  br i1 %t85, label %bb86, label %bb91
-
-bb86:
-  %t87 = load i16* %t65, align 2
-  %t88 = sitofp i16 %t87 to float
-  %t89 = load float* %t49, align 4
-  %t90 = fmul float %t88, %t89
-  store float %t90, float* %t25, align 4
-  store float %t90, float* %t12, align 4
-  store float %t90, float* %t14, align 4
-  store float %t90, float* %t16, align 4
-  store float %t90, float* %t18, align 4
-  store float %t90, float* %t20, align 4
-  store float %t90, float* %t22, align 4
-  store float %t90, float* %t24, align 4
-  br label %bb156
-
-bb91:
-  %t92 = load i16* %t65, align 2
-  %t93 = sitofp i16 %t92 to float
-  %t94 = load float* %t49, align 4
-  %t95 = fmul float %t93, %t94
-  %t96 = sitofp i16 %t68 to float
-  %t97 = load float* %t29, align 4
-  %t98 = fmul float %t96, %t97
-  %t99 = load i16* %t58, align 2
-  %t100 = sitofp i16 %t99 to float
-  %t101 = load float* %t32, align 4
-  %t102 = fmul float %t100, %t101
-  %t103 = load i16* %t62, align 2
-  %t104 = sitofp i16 %t103 to float
-  %t105 = load float* %t35, align 4
-  %t106 = fmul float %t104, %t105
-  %t107 = fadd float %t95, %t102
-  %t108 = fsub float %t95, %t102
-  %t109 = fadd float %t98, %t106
-  %t110 = fsub float %t98, %t106
-  %t111 = fmul float %t110, 0x3FF6A09E60000000
-  %t112 = fsub float %t111, %t109
-  %t113 = fadd float %t107, %t109
-  %t114 = fsub float %t107, %t109
-  %t115 = fadd float %t108, %t112
-  %t116 = fsub float %t108, %t112
-  %t117 = sitofp i16 %t66 to float
-  %t118 = load float* %t38, align 4
-  %t119 = fmul float %t117, %t118
-  %t120 = load i16* %t56, align 2
-  %t121 = sitofp i16 %t120 to float
-  %t122 = load float* %t41, align 4
-  %t123 = fmul float %t121, %t122
-  %t124 = load i16* %t60, align 2
-  %t125 = sitofp i16 %t124 to float
-  %t126 = load float* %t44, align 4
-  %t127 = fmul float %t125, %t126
-  %t128 = load i16* %t64, align 2
-  %t129 = sitofp i16 %t128 to float
-  %t130 = load float* %t47, align 4
-  %t131 = fmul float %t129, %t130
-  %t132 = fadd float %t127, %t123
-  %t133 = fsub float %t127, %t123
-  %t134 = fadd float %t119, %t131
-  %t135 = fsub float %t119, %t131
-  %t136 = fadd float %t134, %t132
-  %t137 = fsub float %t134, %t132
-  %t138 = fmul float %t137, 0x3FF6A09E60000000
-  %t139 = fadd float %t133, %t135
-  %t140 = fmul float %t139, 0x3FFD906BC0000000
-  %t141 = fmul float %t135, 0x3FF1517A80000000
-  %t142 = fsub float %t141, %t140
-  %t143 = fmul float %t133, 0xC004E7AEA0000000
-  %t144 = fadd float %t143, %t140
-  %t145 = fsub float %t144, %t136
-  %t146 = fsub float %t138, %t145
-  %t147 = fadd float %t142, %t146
-  %t148 = fadd float %t113, %t136
-  store float %t148, float* %t25, align 4
-  %t149 = fsub float %t113, %t136
-  store float %t149, float* %t24, align 4
-  %t150 = fadd float %t115, %t145
-  store float %t150, float* %t12, align 4
-  %t151 = fsub float %t115, %t145
-  store float %t151, float* %t22, align 4
-  %t152 = fadd float %t116, %t146
-  store float %t152, float* %t14, align 4
-  %t153 = fsub float %t116, %t146
-  store float %t153, float* %t20, align 4
-  %t154 = fadd float %t114, %t147
-  store float %t154, float* %t18, align 4
-  %t155 = fsub float %t114, %t147
-  store float %t155, float* %t16, align 4
-  br label %bb156
-
-bb156:
-  %t157 = add i32 %t10, 1
-  %t158 = icmp eq i32 %t157, 8
-  br i1 %t158, label %bb159, label %bb9
-
-bb159:
-  %t160 = add i32 %a4, 7
-  %t161 = add i32 %a4, 1
-  %t162 = add i32 %a4, 6
-  %t163 = add i32 %a4, 2
-  %t164 = add i32 %a4, 5
-  %t165 = add i32 %a4, 4
-  %t166 = add i32 %a4, 3
-  br label %bb167
-
-bb167:
-  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
-  %t169 = getelementptr i8** %a3, i32 %t168
-  %t170 = shl i32 %t168, 3
-  %t171 = or i32 %t170, 4
-  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
-  %t173 = or i32 %t170, 2
-  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
-  %t175 = or i32 %t170, 6
-  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
-  %t177 = or i32 %t170, 5
-  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
-  %t179 = or i32 %t170, 3
-  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
-  %t181 = or i32 %t170, 1
-  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
-  %t183 = or i32 %t170, 7
-  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
-  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
-  %t186 = load i8** %t169, align 4
-  %t187 = getelementptr inbounds i8* %t186, i32 %a4
-  %t188 = load float* %t185, align 4
-  %t189 = load float* %t172, align 4
-  %t190 = fadd float %t188, %t189
-  %t191 = fsub float %t188, %t189
-  %t192 = load float* %t174, align 4
-  %t193 = load float* %t176, align 4
-  %t194 = fadd float %t192, %t193
-  %t195 = fsub float %t192, %t193
-  %t196 = fmul float %t195, 0x3FF6A09E60000000
-  %t197 = fsub float %t196, %t194
-  %t198 = fadd float %t190, %t194
-  %t199 = fsub float %t190, %t194
-  %t200 = fadd float %t191, %t197
-  %t201 = fsub float %t191, %t197
-  %t202 = load float* %t178, align 4
-  %t203 = load float* %t180, align 4
-  %t204 = fadd float %t202, %t203
-  %t205 = fsub float %t202, %t203
-  %t206 = load float* %t182, align 4
-  %t207 = load float* %t184, align 4
-  %t208 = fadd float %t206, %t207
-  %t209 = fsub float %t206, %t207
-  %t210 = fadd float %t208, %t204
-  %t211 = fsub float %t208, %t204
-  %t212 = fmul float %t211, 0x3FF6A09E60000000
-  %t213 = fadd float %t205, %t209
-  %t214 = fmul float %t213, 0x3FFD906BC0000000
-  %t215 = fmul float %t209, 0x3FF1517A80000000
-  %t216 = fsub float %t215, %t214
-  %t217 = fmul float %t205, 0xC004E7AEA0000000
-  %t218 = fadd float %t217, %t214
-  %t219 = fsub float %t218, %t210
-  %t220 = fsub float %t212, %t219
-  %t221 = fadd float %t216, %t220
-  %t222 = fadd float %t198, %t210
-  %t223 = fptosi float %t222 to i32
-  %t224 = add nsw i32 %t223, 4
-  %t225 = lshr i32 %t224, 3
-  %t226 = and i32 %t225, 1023
-  %t227 = add i32 %t226, 128
-  %t228 = getelementptr inbounds i8* %t6, i32 %t227
-  %t229 = load i8* %t228, align 1
-  store i8 %t229, i8* %t187, align 1
-  %t230 = fsub float %t198, %t210
-  %t231 = fptosi float %t230 to i32
-  %t232 = add nsw i32 %t231, 4
-  %t233 = lshr i32 %t232, 3
-  %t234 = and i32 %t233, 1023
-  %t235 = add i32 %t234, 128
-  %t236 = getelementptr inbounds i8* %t6, i32 %t235
-  %t237 = load i8* %t236, align 1
-  %t238 = getelementptr inbounds i8* %t186, i32 %t160
-  store i8 %t237, i8* %t238, align 1
-  %t239 = fadd float %t200, %t219
-  %t240 = fptosi float %t239 to i32
-  %t241 = add nsw i32 %t240, 4
-  %t242 = lshr i32 %t241, 3
-  %t243 = and i32 %t242, 1023
-  %t244 = add i32 %t243, 128
-  %t245 = getelementptr inbounds i8* %t6, i32 %t244
-  %t246 = load i8* %t245, align 1
-  %t247 = getelementptr inbounds i8* %t186, i32 %t161
-  store i8 %t246, i8* %t247, align 1
-  %t248 = fsub float %t200, %t219
-  %t249 = fptosi float %t248 to i32
-  %t250 = add nsw i32 %t249, 4
-  %t251 = lshr i32 %t250, 3
-  %t252 = and i32 %t251, 1023
-  %t253 = add i32 %t252, 128
-  %t254 = getelementptr inbounds i8* %t6, i32 %t253
-  %t255 = load i8* %t254, align 1
-  %t256 = getelementptr inbounds i8* %t186, i32 %t162
-  store i8 %t255, i8* %t256, align 1
-  %t257 = fadd float %t201, %t220
-  %t258 = fptosi float %t257 to i32
-  %t259 = add nsw i32 %t258, 4
-  %t260 = lshr i32 %t259, 3
-  %t261 = and i32 %t260, 1023
-  %t262 = add i32 %t261, 128
-  %t263 = getelementptr inbounds i8* %t6, i32 %t262
-  %t264 = load i8* %t263, align 1
-  %t265 = getelementptr inbounds i8* %t186, i32 %t163
-  store i8 %t264, i8* %t265, align 1
-  %t266 = fsub float %t201, %t220
-  %t267 = fptosi float %t266 to i32
-  %t268 = add nsw i32 %t267, 4
-  %t269 = lshr i32 %t268, 3
-  %t270 = and i32 %t269, 1023
-  %t271 = add i32 %t270, 128
-  %t272 = getelementptr inbounds i8* %t6, i32 %t271
-  %t273 = load i8* %t272, align 1
-  %t274 = getelementptr inbounds i8* %t186, i32 %t164
-  store i8 %t273, i8* %t274, align 1
-  %t275 = fadd float %t199, %t221
-  %t276 = fptosi float %t275 to i32
-  %t277 = add nsw i32 %t276, 4
-  %t278 = lshr i32 %t277, 3
-  %t279 = and i32 %t278, 1023
-  %t280 = add i32 %t279, 128
-  %t281 = getelementptr inbounds i8* %t6, i32 %t280
-  %t282 = load i8* %t281, align 1
-  %t283 = getelementptr inbounds i8* %t186, i32 %t165
-  store i8 %t282, i8* %t283, align 1
-  %t284 = fsub float %t199, %t221
-  %t285 = fptosi float %t284 to i32
-  %t286 = add nsw i32 %t285, 4
-  %t287 = lshr i32 %t286, 3
-  %t288 = and i32 %t287, 1023
-  %t289 = add i32 %t288, 128
-  %t290 = getelementptr inbounds i8* %t6, i32 %t289
-  %t291 = load i8* %t290, align 1
-  %t292 = getelementptr inbounds i8* %t186, i32 %t166
-  store i8 %t291, i8* %t292, align 1
-  %t293 = add nsw i32 %t168, 1
-  %t294 = icmp eq i32 %t293, 8
-  br i1 %t294, label %bb295, label %bb167
-
-bb295:
-  ret void
-}
-
-%struct.ct_data_s = type { %union.anon, %union.anon }
-%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
-%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
-%struct.static_tree_desc = type { i32 }
-%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
-%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
-%union.anon = type { i16 }
-
-define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
-entry:
-  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
-  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
-  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
-  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
-  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
-  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
-  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
-  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
-  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
-  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
-  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
-  br i1 %14, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
-  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
-  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
-  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
-  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
-  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
-  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
-  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
-  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
-  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
-  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
-  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
-  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
-  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
-  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
-  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
-  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
-  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
-  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
-  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
-  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
-  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
-  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
-  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
-  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
-  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
-  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
-  br label %bb6
-
-bb6:                                              ; preds = %bb24, %bb2
-  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
-  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
-  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
-  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
-  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
-  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
-  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
-  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
-  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
-  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
-  br i1 %40, label %bb7, label %bb23
-
-bb7:                                              ; preds = %bb6
-  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
-  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
-  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
-  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
-  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
-  br i1 %44, label %bb8, label %bb23
-
-bb8:                                              ; preds = %bb7
-  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
-  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
-  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
-  br i1 %47, label %bb9, label %bb23
-
-bb9:                                              ; preds = %bb8
-  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
-  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
-  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
-  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
-  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
-  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
-  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
-  br i1 %52, label %bb10, label %bb23
-
-bb10:                                             ; preds = %bb9
-  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
-  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
-  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
-  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
-  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
-  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
-  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
-  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb18, %bb10
-  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
-  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
-  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
-  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
-  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
-  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
-  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
-  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
-  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
-  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
-  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
-  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
-  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
-  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
-  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
-  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
-  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
-  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
-  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
-  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
-  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
-  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
-  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
-  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
-  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
-  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
-  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
-  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
-  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
-  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
-  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
-  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
-  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
-  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
-  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
-  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
-  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
-  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
-  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
-  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
-  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
-  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
-  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
-  br i1 %55, label %bb12, label %bb20
-
-bb12:                                             ; preds = %bb11
-  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
-  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
-  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
-  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
-  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
-  br i1 %58, label %bb13, label %bb20
-
-bb13:                                             ; preds = %bb12
-  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
-  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
-  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
-  br i1 %61, label %bb14, label %bb20
-
-bb14:                                             ; preds = %bb13
-  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
-  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
-  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
-  br i1 %64, label %bb15, label %bb20
-
-bb15:                                             ; preds = %bb14
-  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
-  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
-  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
-  br i1 %67, label %bb16, label %bb20
-
-bb16:                                             ; preds = %bb15
-  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
-  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
-  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
-  br i1 %70, label %bb17, label %bb20
-
-bb17:                                             ; preds = %bb16
-  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
-  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
-  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
-  br i1 %73, label %bb18, label %bb20
-
-bb18:                                             ; preds = %bb17
-  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
-  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
-  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
-  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
-  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
-  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-  br i1 %or.cond, label %bb11, label %bb20
-
-bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
-  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
-  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
-  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
-  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
-  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
-  br i1 %81, label %bb21, label %bb23
-
-bb21:                                             ; preds = %bb20
-  store i32 %cur_match_addr.0, i32* %34, align 4
-  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
-  br i1 %82, label %bb22, label %bb25
-
-bb22:                                             ; preds = %bb21
-  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
-  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
-  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
-  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
-  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
-  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
-  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
-  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
-  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
-  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
-  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
-  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
-  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
-  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
-  br i1 %91, label %bb24, label %bb25
-
-bb24:                                             ; preds = %bb23
-
-; LSR should use count-down iteration to avoid requiring the trip count
-; in a register.
-
-;      CHECK: @ %bb24
-; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
-; CHECK: bne.w
-
-  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
-  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
-  br i1 %92, label %bb25, label %bb6
-
-bb25:                                             ; preds = %bb24, %bb23, %bb21
-  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
-  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
-  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
-  ret i32 %merge
-}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..3ac7d77 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,13 +6,42 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
     %tmp1 = icmp eq i32 %cond1, 0
     %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
     %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 30b9f59..dc77282 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
 
-; The ARM magic hinting works best with linear scan.
 ; CHECK: ldrd
 ; CHECK: strd
 ; CHECK: ldrb
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 3cb8a8e..c50a233 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -41,3 +41,45 @@ entry:
         ret i32 %0
 }
 
+define i32 @tn9(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn9:
+; CHECK: add	r0, r0, r0, lsl #3
+; CHECK: rsb	r0, r0, #0
+        %0 = mul i32 %v, -9
+        ret i32 %0
+}
+
+define i32 @tn7(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn7:
+; CHECK: sub r0, r0, r0, lsl #3
+	%0 = mul i32 %v, -7
+	ret i32 %0
+}
+
+define i32 @tn5(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn5:
+; CHECK: add r0, r0, r0, lsl #2
+; CHECK: rsb r0, r0, #0
+        %0 = mul i32 %v, -5
+        ret i32 %0
+}
+
+define i32 @tn3(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn3:
+; CHECK: sub r0, r0, r0, lsl #2
+        %0 = mul i32 %v, -3
+        ret i32 %0
+}
+
+define i32 @tn12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn12288:
+; CHECK: sub r0, r0, r0, lsl #2
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, -12288
+        ret i32 %0
+}
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index c78872a..b892d2d 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vadd.i16 d
+; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
@@ -12,6 +15,11 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vsub.i16 d
+; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 130277b..944bfe0 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vadd.i64 q
+; CHECK: vstmia
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -12,6 +15,12 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vsub.i64 q
+; CHECK: vmov r0, r1, d
+; CHECK: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..277bd05
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs -O0
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
new file mode 100644
index 0000000..b4da552
--- /dev/null
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+
+; CHECK: func_4_8
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_16
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
+  %r = add <2 x i16> %param, <i16 1, i16 2>
+  store <2 x i16> %r, <2 x i16>* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index e670a5b..e72d51f 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; XFAIL: *
+; PR11364
 
 ; vmov s0, r0 + vmov r0, s0 should have been optimized away.
 ; rdar://9104514
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
new file mode 100644
index 0000000..d1d0ee5
--- /dev/null
+++ b/test/CodeGen/ARM/reg_asc_order.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+;CHECK: ldm
+;CHECK: stm
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3a19211..05794e4 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -155,7 +155,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
@@ -240,7 +240,7 @@ bb14:                                             ; preds = %bb6
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
 ; CHECK-NEXT:   vstmia r0, {d16, d17}
@@ -272,8 +272,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index f43dde5..c9ac66a 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,14 +64,14 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
 ; ARM: t4:
 ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
 
 ; ARMT2: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
 ; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
+; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index f1bd7ee..3e07da8 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,12 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
-; CHECK-NEON:      adr     r2, LCPI7_0
-; CHECK-NEON-NEXT: movw    r3, #1123
-; CHECK-NEON-NEXT: adds    r1, r2, #4
-; CHECK-NEON-NEXT: cmp     r0, r3
+; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
+; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   r1, r2
+; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..ca2e18a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..eb971ff 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -55,11 +55,15 @@ define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A8: str [[REG]], [r0, r1, lsl #2]
+; A8-NOT: str [[REG]], [r0]
 
 ; A9: test4:
 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
+; A9-NOT: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f4e3a44..983ba45 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
 
 ; The greedy register allocator uses a single CSR here, invalidating the test.
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 993d7ec..03ae12c 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,13 +12,13 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    s1, r0
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must be spilled:
-; CHECK: vstr.64 d0,
+; CHECK: vstr d0,
 ; CHECK: asm clobber d0
 ; And reloaded after the asm:
-; CHECK: vldr.64 [[D16:d[0-9]+]],
-; CHECK: vstr.64 [[D16]], [r1]
+; CHECK: vldr [[D16:d[0-9]+]],
+; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
   %v1 = insertelement <2 x float> undef, float %x, i32 1
   %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
@@ -37,13 +37,13 @@ define void @f1(float %x, <2 x float>* %p) {
 ; virtual register.  It doesn't read the old value.
 ;
 ; CHECK: f2
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must not be spilled:
-; CHECK-NOT: vstr.64
+; CHECK-NOT: vstr
 ; CHECK: asm clobber d0
 ; But instead rematerialize after the asm:
-; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI
-; CHECK: vstr.64 [[D0:d[0-9]+]], [r0]
+; CHECK: vldr [[S0:s[0-9]+]], LCPI
+; CHECK: vstr [[D0:d[0-9]+]], [r0]
 define void @f2(<2 x float>* %p) {
   %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index 14e668e..f157dbd 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -2,8 +2,8 @@
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -16,8 +16,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK: v_bsli16:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -30,8 +30,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK: v_bsli32:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK: v_bsli64:
-;CHECK: vldr.64
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 1387393..7fddbed 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 
 ; Test signed conversion.
 ; CHECK: t1
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>)
 
 ; Test unsigned conversion.
 ; CHECK: t2
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
   %tmp = load i32* @uin, align 4, !tbaa !3
@@ -38,7 +38,7 @@ entry:
 
 ; Test which should not fold due to non-power of 2.
 ; CHECK: t3
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -52,7 +52,7 @@ entry:
 
 ; Test which should not fold due to power of 2 out of range.
 ; CHECK: t4
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -66,7 +66,7 @@ entry:
 
 ; Test case where const is max power of 2 (i.e., 2^32).
 ; CHECK: t5
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -80,7 +80,7 @@ entry:
 
 ; Test quadword.
 ; CHECK: t6
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index e99fac1..05332e4 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -254,7 +254,7 @@ entry:
 ;CHECK: redundantVdup:
 ;CHECK: vmov.i8
 ;CHECK-NOT: vdup.8
-;CHECK: vstr.64
+;CHECK: vstr
 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 81bdc44..a38a0fe 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -80,7 +80,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 ; so they are not split up into i32 values.  Radar 8755338.
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
@@ -89,7 +89,7 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %vec = load <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
@@ -99,7 +99,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
-; CHECK: vstr.64
+; CHECK: vstr
   %vec = load <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
@@ -123,3 +123,13 @@ define void @orVec(<3 x i8>* %A) nounwind {
   ret void
 }
 
+; The following test was hitting an assertion in the DAG combiner when
+; constant folding the multiply because the "sext undef" was translated to
+; a BUILD_VECTOR with i32 0 operands, which did not match the i16 operands
+; of the other BUILD_VECTOR.
+define i16 @foldBuildVectors() {
+  %1 = sext <8 x i8> undef to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
new file mode 100644
index 0000000..5e9239f
--- /dev/null
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: g:
+define float @g(<4 x i8>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i8>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i8> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: h:
+define <4 x i8> @h(<4 x float> %v) {
+  ; CHECK: vcvt.{{[us]}}32.f32
+  ; CHECK: vmovn.i32
+  %1 = fptoui <4 x float> %v to <4 x i8>
+  ret <4 x i8> %1
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 65b5913..e224bdf 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -138,7 +138,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; Make sure this doesn't crash
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK: test_elem_mismatch:
-; CHECK: vstr.64
+; CHECK: vstr
   %tmp0 = load <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index d0e9ac3..61d73c1 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0]
+;CHECK: vld1.32 {d16[]}, [r0, :32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0]
+;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 0d7d4ec..7bd0cbd 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -31,9 +31,19 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
         ret <2 x i32> %tmp3
 }
 
+define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32a32:
+;Check the alignment value.  Legal values are none or :32.
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 4
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0]
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -69,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0]
+;CHECK: vld1.32 {d16[0]}, [r0, :32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index a86be32b..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -56,13 +56,13 @@ define <2 x i32> @v_movi32d() nounwind {
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d{{.*}}, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
@@ -92,19 +92,19 @@ define <2 x i32> @v_mvni32d() nounwind {
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
@@ -152,19 +152,19 @@ define <4 x i32> @v_movQi32d() nounwind {
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q{{.*}}, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20ff
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20ffff
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d{{.*}}, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xb5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -353,3 +353,48 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
 }
+
+; Use vmov.f32 to materialize f32 immediate splats
+; rdar://10437054
+define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v2f32:
+;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
+  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32:
+;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
+  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32_undef:
+;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
+  %a = load <4 x float> *%p
+  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
+  store <4 x float> %b, <4 x float> *%p
+  ret void
+}
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1780d6e..61d89bb 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -514,3 +514,14 @@ entry:
   store <8 x i8> %10, <8 x i8>* %11, align 8
   ret void
 }
+
+; If one operand has a zero-extend and the other a sign-extend, vmull
+; cannot be used.
+define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
+; CHECK: vmullWithInconsistentExtensions
+; CHECK-NOT: vmull.s8
+  %1 = sext <8 x i8> %vec to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 34acd16..122ec03 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -148,11 +148,11 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   ret void
 }
 
-; vrev <4 x i16> should use VREV32 and not VREV64
+; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
+; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vext.16
-; CHECK: vrev32.16
+; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 08b7232..758b355 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -358,6 +358,13 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+; Make sure this doesn't crash; PR10258
+define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
+;CHECK: variable_insertelement:
+    %r = insertelement <8 x i16> %a, i16 %b, i32 %c
+    ret <8 x i16> %r
+}
+
 declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 8fd99ba..2cffda3 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
-; The 0.0 constant is loaded from the constant pool and kept in a register.
+; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
 ; CHECK: %entry
-; CHECK: vldr.32 s
+; CHECK: vldr s
 ; The float loop variable is initialized with a vmovs from the constant register.
 ; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
 ; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
@@ -24,8 +24,8 @@ for.body4:
   br label %for.body.i
 
 for.body.i:
-  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
-  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 1.000000e+10
   %exitcond.i = icmp eq i32 undef, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
 
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
deleted file mode 100644
index 4b3d022..0000000
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha
-
-@.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
-
-declare void @printf(i32, ...)
-
-define void @main() {
-entry:
-        %tmp.11861 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.19466 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.21571 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.36796 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        br i1 %tmp.11861, label %loopexit.2, label %no_exit.2
-
-no_exit.2:              ; preds = %entry
-        ret void
-
-loopexit.2:             ; preds = %entry
-        br i1 %tmp.19466, label %loopexit.3, label %no_exit.3.preheader
-
-no_exit.3.preheader:            ; preds = %loopexit.2
-        ret void
-
-loopexit.3:             ; preds = %loopexit.2
-        br i1 %tmp.21571, label %no_exit.6, label %no_exit.4
-
-no_exit.4:              ; preds = %loopexit.3
-        ret void
-
-no_exit.6:              ; preds = %no_exit.6, %loopexit.3
-        %tmp.30793 = icmp sgt i64 0, 0          ; <i1> [#uses=1]
-        br i1 %tmp.30793, label %loopexit.6, label %no_exit.6
-
-loopexit.6:             ; preds = %no_exit.6
-        %Z.1 = select i1 %tmp.36796, double 1.000000e+00, double 0x3FEFFF7CEDE74EAE; <double> [#uses=2]
-        tail call void (i32, ...)* @printf( i32 0, i64 0, i64 0, i64 0, double 1.000000e+00, double 1.000000e+00, double %Z.1, double %Z.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
deleted file mode 100644
index 65d2a8d..0000000
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; The global symbol should be legalized
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-        %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
-        %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
-        %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@clause_SORT = external global [21 x %struct.LIST_HELP*]                ; <[21 x %struct.LIST_HELP*]*> [#uses=0]
-@ia_in = external global %struct._IO_FILE*              ; <%struct._IO_FILE**> [#uses=1]
-@multvec_j = external global [100 x i32]                ; <[100 x i32]*> [#uses=0]
-
-define void @main(i32 %argc) {
-clock_Init.exit:
-        %tmp.5.i575 = load i32* null            ; <i32> [#uses=1]
-        %tmp.309 = icmp eq i32 %tmp.5.i575, 0           ; <i1> [#uses=1]
-        br i1 %tmp.309, label %UnifiedReturnBlock, label %then.17
-
-then.17:                ; preds = %clock_Init.exit
-        store %struct._IO_FILE* null, %struct._IO_FILE** @ia_in
-        %savedstack = call i8* @llvm.stacksave( )               ; <i8*> [#uses=0]
-        ret void
-
-UnifiedReturnBlock:             ; preds = %clock_Init.exit
-        ret void
-}
-
-declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
deleted file mode 100644
index 45587f0..0000000
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev6-unknown-linux-gnu"
-deplibs = [ "c", "crtend", "stdc++" ]
-        %struct.__va_list_tag = type { i8*, i32 }
-
-define i32 @emit_library_call_value(i32 %nargs, ...) {
-entry:
-        %tmp.223 = va_arg %struct.__va_list_tag* null, i32              ; <i32> [#uses=1]
-        ret i32 %tmp.223
-}
-
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
deleted file mode 100644
index 671d39e..0000000
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct._Callback_list = type { %struct._Callback_list*, void (i32, %struct.ios_base*, i32)*, i32, i32 }
-        %struct._Impl = type { i32, %struct.facet**, i64, %struct.facet**, i8** }
-        %struct._Words = type { i8*, i64 }
-        %"struct.__codecvt_abstract_base<char,char,__mbstate_t>" = type { %struct.facet }
-        %"struct.basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %struct.locale }
-        %struct.facet = type { i32 (...)**, i32 }
-        %struct.ios_base = type { i32 (...)**, i64, i64, i32, i32, i32, %struct._Callback_list*, %struct._Words, [8 x %struct._Words], i32, %struct._Words*, %struct.locale }
-        %struct.locale = type { %struct._Impl* }
-        %"struct.ostreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.basic_streambuf<char,std::char_traits<char> >"*, i1 }
-
-define void @_ZNKSt7num_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE15_M_insert_floatIdEES3_S3_RSt8ios_baseccT_() {
-entry:
-        %tmp234 = icmp eq i8 0, 0               ; <i1> [#uses=1]
-        br i1 %tmp234, label %cond_next243, label %cond_true235
-
-cond_true235:           ; preds = %entry
-        ret void
-
-cond_next243:           ; preds = %entry
-        %tmp428 = load i64* null                ; <i64> [#uses=1]
-        %tmp428.upgrd.1 = trunc i64 %tmp428 to i32              ; <i32> [#uses=1]
-        %tmp429 = alloca i8, i32 %tmp428.upgrd.1                ; <i8*> [#uses=0]
-        unreachable
-}
-
-
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
deleted file mode 100644
index 5d31bc3..0000000
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define i32 @_ZN9__gnu_cxx18__exchange_and_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %__tmp = alloca i32, align 4            ; <i32*> [#uses=1]
-        %tmp3 = call i32 asm sideeffect "\0A$$Lxadd_0:\0A\09ldl_l  $0,$3\0A\09addl   $0,$4,$1\0A\09stl_c  $1,$2\0A\09beq    $1,$$Lxadd_0\0A\09mb", "=&r,=*&r,=*m,m,r"( i32* %__tmp, i32* %__mem, i32* %__mem, i32 %__val )            ; <i32> [#uses=1]
-        ret i32 %tmp3
-}
-
-define void @_ZN9__gnu_cxx12__atomic_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %tmp2 = call i32 asm sideeffect "\0A$$Ladd_1:\0A\09ldl_l  $0,$2\0A\09addl   $0,$3,$0\0A\09stl_c  $0,$1\0A\09beq    $0,$$Ladd_1\0A\09mb", "=&r,=*m,m,r"( i32* %__mem, i32* %__mem, i32 %__val )                ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
deleted file mode 100644
index 14e0bcc..0000000
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct.va_list = type { i8*, i32, i32 }
-
-define void @yyerror(i32, ...) {
-entry:
-        %va.upgrd.1 = bitcast %struct.va_list* null to i8*              ; <i8*> [#uses=1]
-        call void @llvm.va_start( i8* %va.upgrd.1 )
-        ret void
-}
-
-declare void @llvm.va_start(i8*)
-
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
deleted file mode 100644
index b537e25..0000000
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-;FIXME: this should produce no mul inst.  But not crashing will have to do for now
-
-; ModuleID = 'Output/bugpoint-train/bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define fastcc i32 @getcount(i32 %s) {
-cond_next43:		; preds = %bb27
-	%tmp431 = mul i32 %s, -3
-	ret i32 %tmp431
-}
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
deleted file mode 100644
index 1a4b40e..0000000
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i64 @__mulvdi3(i64 %a, i64 %b) nounwind {
-entry:
-	%0 = sext i64 %a to i128		; <i128> [#uses=1]
-	%1 = sext i64 %b to i128		; <i128> [#uses=1]
-	%2 = mul i128 %1, %0		; <i128> [#uses=2]
-	%3 = lshr i128 %2, 64		; <i128> [#uses=1]
-	%4 = trunc i128 %3 to i64		; <i64> [#uses=1]
-	%5 = trunc i128 %2 to i64		; <i64> [#uses=1]
-	%6 = icmp eq i64 %4, 0		; <i1> [#uses=1]
-	br i1 %6, label %bb1, label %bb
-
-bb:		; preds = %entry
-	unreachable
-
-bb1:		; preds = %entry
-	ret i64 %5
-}
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
deleted file mode 100644
index 8b9b603..0000000
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-; PR3044
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i128 @__mulvti3(i128 %u, i128 %v) nounwind {
-entry:
-	%0 = load i128* null, align 16		; <i128> [#uses=1]
-	%1 = load i64* null, align 8		; <i64> [#uses=1]
-	%2 = zext i64 %1 to i128		; <i128> [#uses=1]
-	%3 = add i128 %2, %0		; <i128> [#uses=1]
-	store i128 %3, i128* null, align 16
-	unreachable
-}
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
deleted file mode 100644
index cfbf7fc..0000000
--- a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i1 @a(float %x) {
-  %r = fcmp ult float %x, 1.0
-  ret i1 %r
-}
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4590f12..0000000
--- a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
deleted file mode 100644
index b838ec9..0000000
--- a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=alpha | FileCheck %s
-
-define fastcc i64 @getcount(i64 %s) {
-	%tmp431 = mul i64 %s, 12884901888
-	ret i64 %tmp431
-}
-
-; CHECK: sll $16,33,$0
-; CHECK-NEXT: sll $16,32,$1
-; CHECK-NEXT: addq $0,$1,$0
-
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
deleted file mode 100644
index 8a92695..0000000
--- a/test/CodeGen/Alpha/add.ll
+++ /dev/null
@@ -1,178 +0,0 @@
-;test all the shifted and signextending adds and subs with and without consts
-;
-; RUN: llc < %s -march=alpha -o %t.s
-; RUN: grep {	addl} %t.s | count 2
-; RUN: grep {	addq} %t.s | count 2
-; RUN: grep {	subl} %t.s | count 2
-; RUN: grep {	subq} %t.s | count 2
-;
-; RUN: grep {s4addl} %t.s | count 2
-; RUN: grep {s8addl} %t.s | count 2
-; RUN: grep {s4addq} %t.s | count 2
-; RUN: grep {s8addq} %t.s | count 2
-;
-; RUN: grep {s4subl} %t.s | count 2
-; RUN: grep {s8subl} %t.s | count 2
-; RUN: grep {s4subq} %t.s | count 2
-; RUN: grep {s8subq} %t.s | count 2
-
-
-define signext i32 @al(i32 signext %x.s, i32 signext %y.s) {
-entry:
-	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @ali(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i64 @aq(i64 signext %x.s, i64 signext %y.s)  {
-entry:
-	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @aqi(i64 %x.s) {
-entry:
-	%tmp.3.s = add i64 100, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @sl(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @sli(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @sq(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.3.s = sub i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @sqi(i64 %x.s) {
-entry:
-	%tmp.3.s = sub i64 %x.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
deleted file mode 100644
index fa3b949..0000000
--- a/test/CodeGen/Alpha/add128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for ADDC and ADDE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @add128(i128 %x, i128 %y) {
-entry:
-	%tmp = add i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
deleted file mode 100644
index 9f00350..0000000
--- a/test/CodeGen/Alpha/bic.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep {bic}
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = and i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
deleted file mode 100644
index 14f6b46..0000000
--- a/test/CodeGen/Alpha/bsr.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; Make sure this testcase codegens the bsr instruction
-; RUN: llc < %s -march=alpha | grep bsr
-
-define internal i64 @abc(i32 %x) {
-        %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
-        %tmp.0 = call i64 @abc( i32 %tmp.2 )            ; <i64> [#uses=1]
-        %tmp.5 = add i32 %x, -2         ; <i32> [#uses=1]
-        %tmp.3 = call i64 @abc( i32 %tmp.5 )            ; <i64> [#uses=1]
-        %tmp.6 = add i64 %tmp.0, %tmp.3         ; <i64> [#uses=1]
-        ret i64 %tmp.6
-}
-
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
deleted file mode 100644
index 24e97a9..0000000
--- a/test/CodeGen/Alpha/call_adj.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-;All this should do is not crash
-;RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE22_M_convert_to_externalEPcl(i32 %f) {
-entry:
-        %tmp49 = alloca i8, i32 %f              ; <i8*> [#uses=0]
-        %tmp = call i32 null( i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* null )               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
deleted file mode 100644
index 9b655f0..0000000
--- a/test/CodeGen/Alpha/cmov.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=alpha | not grep cmovlt
-; RUN: llc < %s -march=alpha | grep cmoveq
-
-define i64 @cmov_lt(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %a, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_const(i64 %a, i64 %b, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %a, %b            ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %c, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_lt2(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp sgt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 10, i64 %a              ; <i64> [#uses=1]
-        ret i64 %retval
-}
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
deleted file mode 100644
index e88d2ee..0000000
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep cmpbge | count 2
-
-define i1 @test1(i64 %A, i64 %B) {
-        %C = and i64 %A, 255            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
-
-define i1 @test2(i64 %a, i64 %B) {
-        %A = shl i64 %a, 1              ; <i64> [#uses=1]
-        %C = and i64 %A, 254            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
deleted file mode 100644
index aa1588a..0000000
--- a/test/CodeGen/Alpha/ctlz.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; Make sure this testcase codegens to the ctlz instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
-
-declare i8 @llvm.ctlz.i8(i8)
-
-define i32 @bar(i8 %x) {
-entry:
-	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 
-	%tmp.2 = sext i8 %tmp.1 to i32
-	ret i32 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
deleted file mode 100644
index 230e096..0000000
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=alpha | not grep -i ctpop 
-
-declare i64 @llvm.ctlz.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctlz.i64( i64 %x )              ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
deleted file mode 100644
index f887882..0000000
--- a/test/CodeGen/Alpha/ctpop.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=+CIX | \
-; RUN:   grep -i ctpop
-; RUN: llc < %s -march=alpha -mcpu=ev6 | \
-; RUN:   not grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=-CIX | \
-; RUN:   not grep -i ctpop
-
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctpop.i64( i64 %x )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/dg.exp b/test/CodeGen/Alpha/dg.exp
deleted file mode 100644
index fb9f710..0000000
--- a/test/CodeGen/Alpha/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Alpha] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
deleted file mode 100644
index b3413d6..0000000
--- a/test/CodeGen/Alpha/eqv.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the eqv instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = xor i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
deleted file mode 100644
index 35b1d08..0000000
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
-
-
-define signext i32 @foo(i32 signext %x) {
-entry:
-	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]
-	ret i32 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
deleted file mode 100644
index 4cf80dee..0000000
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
-
-define void @foo() {
-entry:
-        br label %bb
-
-bb:             ; preds = %bb, %entry
-        br i1 false, label %bb26, label %bb
-
-bb19:           ; preds = %bb26
-        ret void
-
-bb26:           ; preds = %bb
-        br i1 false, label %bb30, label %bb19
-
-bb30:           ; preds = %bb26
-        br label %bb45
-
-bb45:           ; preds = %bb45, %bb30
-        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
-        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
-        br label %bb45
-}
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
deleted file mode 100644
index 917c932..0000000
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; try to check that we have the most important instructions, which shouldn't 
-; appear otherwise
-; RUN: llc < %s -march=alpha | grep jmp
-; RUN: llc < %s -march=alpha | grep gprel32
-; RUN: llc < %s -march=alpha | grep ldl
-; RUN: llc < %s -march=alpha | grep rodata
-; END.
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str = internal constant [2 x i8] c"1\00"               ; <[2 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"2\00"              ; <[2 x i8]*> [#uses=1]
-@str2 = internal constant [2 x i8] c"3\00"              ; <[2 x i8]*> [#uses=1]
-@str3 = internal constant [2 x i8] c"4\00"              ; <[2 x i8]*> [#uses=1]
-@str4 = internal constant [2 x i8] c"5\00"              ; <[2 x i8]*> [#uses=1]
-@str5 = internal constant [2 x i8] c"6\00"              ; <[2 x i8]*> [#uses=1]
-@str6 = internal constant [2 x i8] c"7\00"              ; <[2 x i8]*> [#uses=1]
-@str7 = internal constant [2 x i8] c"8\00"              ; <[2 x i8]*> [#uses=1]
-
-define i32 @main(i32 %x, i8** %y) {
-entry:
-        %x_addr = alloca i32            ; <i32*> [#uses=2]
-        %y_addr = alloca i8**           ; <i8***> [#uses=1]
-        %retval = alloca i32, align 4           ; <i32*> [#uses=2]
-        %tmp = alloca i32, align 4              ; <i32*> [#uses=2]
-        %foo = alloca i8*, align 8              ; <i8**> [#uses=9]
-        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-        store i32 %x, i32* %x_addr
-        store i8** %y, i8*** %y_addr
-        %tmp.upgrd.1 = load i32* %x_addr                ; <i32> [#uses=1]
-        switch i32 %tmp.upgrd.1, label %bb15 [
-                 i32 1, label %bb
-                 i32 2, label %bb1
-                 i32 3, label %bb3
-                 i32 4, label %bb5
-                 i32 5, label %bb7
-                 i32 6, label %bb9
-                 i32 7, label %bb11
-                 i32 8, label %bb13
-        ]
-
-bb:             ; preds = %entry
-        %tmp.upgrd.2 = getelementptr [2 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
-        store i8* %tmp.upgrd.2, i8** %foo
-        br label %bb16
-
-bb1:            ; preds = %entry
-        %tmp2 = getelementptr [2 x i8]* @str1, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp2, i8** %foo
-        br label %bb16
-
-bb3:            ; preds = %entry
-        %tmp4 = getelementptr [2 x i8]* @str2, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp4, i8** %foo
-        br label %bb16
-
-bb5:            ; preds = %entry
-        %tmp6 = getelementptr [2 x i8]* @str3, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp6, i8** %foo
-        br label %bb16
-
-bb7:            ; preds = %entry
-        %tmp8 = getelementptr [2 x i8]* @str4, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp8, i8** %foo
-        br label %bb16
-
-bb9:            ; preds = %entry
-        %tmp10 = getelementptr [2 x i8]* @str5, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp10, i8** %foo
-        br label %bb16
-
-bb11:           ; preds = %entry
-        %tmp12 = getelementptr [2 x i8]* @str6, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp12, i8** %foo
-        br label %bb16
-
-bb13:           ; preds = %entry
-        %tmp14 = getelementptr [2 x i8]* @str7, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp14, i8** %foo
-        br label %bb16
-
-bb15:           ; preds = %entry
-        br label %bb16
-
-bb16:           ; preds = %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1, %bb
-        %tmp17 = load i8** %foo         ; <i8*> [#uses=1]
-        %tmp18 = call i32 (...)* @print( i8* %tmp17 )           ; <i32> [#uses=0]
-        store i32 0, i32* %tmp
-        %tmp19 = load i32* %tmp         ; <i32> [#uses=1]
-        store i32 %tmp19, i32* %retval
-        br label %return
-
-return:         ; preds = %bb16
-        %retval.upgrd.3 = load i32* %retval             ; <i32> [#uses=1]
-        ret i32 %retval.upgrd.3
-}
-
-declare i32 @print(...)
-
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
deleted file mode 100644
index 3268c54..0000000
--- a/test/CodeGen/Alpha/mb.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha | grep mb
-
-define void @test() {
-	fence seq_cst
-	ret void
-}
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
deleted file mode 100644
index daf8409..0000000
--- a/test/CodeGen/Alpha/mul128.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
-entry:
-        %r = mul i128 %a, %b
-        ret i128 %r
-}
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
deleted file mode 100644
index 4075dd6..0000000
--- a/test/CodeGen/Alpha/mul5.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Make sure this testcase does not use mulq
-; RUN: llc < %s -march=alpha | not grep -i mul
-
-define i64 @foo1(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 9          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo3(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 259                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo4l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 260                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo8l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 768                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 5          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
deleted file mode 100644
index 0db767f..0000000
--- a/test/CodeGen/Alpha/neg1.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llc < %s -march=alpha | grep {\\-1}
-
-define i64 @bar() {
-entry:
-	ret i64 -1
-}
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
deleted file mode 100644
index 4f0a5c2..0000000
--- a/test/CodeGen/Alpha/not.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
deleted file mode 100644
index f930e34..0000000
--- a/test/CodeGen/Alpha/ornot.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep ornot
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = or i64 %y, %tmp.1              ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
deleted file mode 100644
index f8d3094..0000000
--- a/test/CodeGen/Alpha/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=alpha > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep bsr.*\\\$\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep ldah.*\\\$baz %t
-
-define private void @foo() {
-        ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
-}
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
deleted file mode 100644
index d6665b5..0000000
--- a/test/CodeGen/Alpha/rpcc.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=alpha | grep rpcc
-
-declare i64 @llvm.readcyclecounter()
-
-define i64 @foo() {
-entry:
-        %tmp.1 = call i64 @llvm.readcyclecounter( )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
deleted file mode 100644
index 3042ef3..0000000
--- a/test/CodeGen/Alpha/srl_and.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-entry:
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
deleted file mode 100644
index d26404b..0000000
--- a/test/CodeGen/Alpha/sub128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for SUBC and SUBE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @sub128(i128 %x, i128 %y) {
-entry:
-	%tmp = sub i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
deleted file mode 100644
index ff04de9..0000000
--- a/test/CodeGen/Alpha/weak.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep .weak.*f
-; RUN: llc < %s -march=alpha | grep .weak.*h
-
-define weak i32 @f() {
-entry:
-        unreachable
-}
-
-define void @g() {
-entry:
-        tail call void @h( )
-        ret void
-}
-
-declare extern_weak void @h()
-
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
deleted file mode 100644
index a47035e..0000000
--- a/test/CodeGen/Alpha/zapnot.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-
-define zeroext i16 @foo(i64 %y)  {
-entry:
-        %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]
-        ret i16 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
deleted file mode 100644
index cd3caae..0000000
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = and i64 %x, 16711935           ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
deleted file mode 100644
index f02961f..0000000
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-;demanded bits mess up this mask in a hard to fix way
-;define i64 @foo(i64 %y) {
-;        %tmp = and i64 %y,  65535
-;        %tmp2 = shr i64 %tmp,  i8 3
-;        ret i64 %tmp2
-;}
-
-define i64 @foo2(i64 %y) {
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
deleted file mode 100644
index 89beeef..0000000
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-        %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 65535             ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
deleted file mode 100644
index 50fccb4..0000000
--- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy
-
-; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
-; super-register is illegally extended.
-
-define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
-  %y1 = add i16 %x1, 1
-  %y2 = add i16 %x2, 2
-  %y3 = add i16 %x3, 3
-  %y4 = add i16 %x4, 4
-  %z12 = add i16 %y1, %y2
-  %z34 = add i16 %y3, %y4
-  %p = add i16 %z12, %z34
-  ret i16 %p
-}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
deleted file mode 100644
index e5d1637..0000000
--- a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
-
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
-
-define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
-	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
-	store i8 %a, i8* %AP
-	store i16 %b, i16* %BP
-	store i64 %d, i64* %DP
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
deleted file mode 100644
index 0b731dc..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; When joining live intervals of sub-registers, an MBB live-in list is not
-; updated properly. The register scavenger asserts on an undefined register.
-
-define i32 @foo(i8 %bar) {
-entry:
-  switch i8 %bar, label %bb1203 [
-    i8 117, label %bb1204
-    i8 85, label %bb1204
-    i8 106, label %bb1204
-  ]
-
-bb1203:                                           ; preds = %entry
-  ret i32 1
-
-bb1204:                                           ; preds = %entry, %entry, %entry
-  ret i32 2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
deleted file mode 100644
index dcc3ea0..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
-; argument registers. This then causes register scavenger asserts.
-
-declare i32 @printf(i8*, i32, float)
-
-define i32 @testissue(i32 %i, float %x, float %y) {
-  br label %bb1
-
-bb1:                                              ; preds = %bb1, %0
-  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
-  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
-  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
-  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
-  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
-  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
-  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
-  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
-  br i1 %b, label %bb1, label %bb2
-
-bb2:                                              ; preds = %bb1
-  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
-  ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
deleted file mode 100644
index b6cd2d4..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; An undef argument causes a setugt node to escape instruction selection.
-
-define void @bugt() {
-cond_next305:
-  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
-  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
-  br i1 %tmp308, label %bb311, label %bb314
-
-bb311:                                            ; preds = %cond_next305
-  unreachable
-
-bb314:                                            ; preds = %cond_next305
-  ret void
-}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
deleted file mode 100644
index 8dcf3f8..0000000
--- a/test/CodeGen/Blackfin/add-overflow.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%0 = type { i24, i1 }		; type %0
-
-define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
-entry:
-	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
-	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
-	br i1 %obit, label %carry, label %normal
-
-normal:		; preds = %entry
-	ret i1 true
-
-carry:		; preds = %entry
-	ret i1 false
-}
-
-declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
deleted file mode 100644
index 3311c03..0000000
--- a/test/CodeGen/Blackfin/add.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
deleted file mode 100644
index dd56101..0000000
--- a/test/CodeGen/Blackfin/addsub-i128.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; These functions have just the right size to annoy the register scavenger: They
-; use all the scratch registers, but not all the callee-saved registers.
-
-define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
-
-define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
deleted file mode 100644
index c63adab..0000000
--- a/test/CodeGen/Blackfin/basic-i1.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @add(i1 %A, i1 %B) {
-	%R = add i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sub(i1 %A, i1 %B) {
-	%R = sub i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @mul(i1 %A, i1 %B) {
-	%R = mul i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sdiv(i1 %A, i1 %B) {
-	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @udiv(i1 %A, i1 %B) {
-	%R = udiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @srem(i1 %A, i1 %B) {
-	%R = srem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @urem(i1 %A, i1 %B) {
-	%R = urem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @and(i1 %A, i1 %B) {
-	%R = and i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @or(i1 %A, i1 %B) {
-	%R = or i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @xor(i1 %A, i1 %B) {
-	%R = xor i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
deleted file mode 100644
index 541e9a8..0000000
--- a/test/CodeGen/Blackfin/basic-i16.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @add(i16 %A, i16 %B) {
-	%R = add i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sub(i16 %A, i16 %B) {
-	%R = sub i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @mul(i16 %A, i16 %B) {
-	%R = mul i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sdiv(i16 %A, i16 %B) {
-	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @udiv(i16 %A, i16 %B) {
-	%R = udiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @srem(i16 %A, i16 %B) {
-	%R = srem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @urem(i16 %A, i16 %B) {
-	%R = urem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
deleted file mode 100644
index 4b5dbfc..0000000
--- a/test/CodeGen/Blackfin/basic-i32.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sub(i32 %A, i32 %B) {
-	%R = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @mul(i32 %A, i32 %B) {
-	%R = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @udiv(i32 %A, i32 %B) {
-	%R = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @srem(i32 %A, i32 %B) {
-	%R = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @urem(i32 %A, i32 %B) {
-	%R = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @and(i32 %A, i32 %B) {
-	%R = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @or(i32 %A, i32 %B) {
-	%R = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @xor(i32 %A, i32 %B) {
-	%R = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
deleted file mode 100644
index d4dd8e2..0000000
--- a/test/CodeGen/Blackfin/basic-i64.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i64 @add(i64 %A, i64 %B) {
-	%R = add i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sub(i64 %A, i64 %B) {
-	%R = sub i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @mul(i64 %A, i64 %B) {
-	%R = mul i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sdiv(i64 %A, i64 %B) {
-	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @udiv(i64 %A, i64 %B) {
-	%R = udiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @srem(i64 %A, i64 %B) {
-	%R = srem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @urem(i64 %A, i64 %B) {
-	%R = urem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @and(i64 %A, i64 %B) {
-	%R = and i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @or(i64 %A, i64 %B) {
-	%R = or i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @xor(i64 %A, i64 %B) {
-	%R = xor i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
deleted file mode 100644
index 2c7ce9d..0000000
--- a/test/CodeGen/Blackfin/basic-i8.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i8 @add(i8 %A, i8 %B) {
-	%R = add i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sub(i8 %A, i8 %B) {
-	%R = sub i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @mul(i8 %A, i8 %B) {
-	%R = mul i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sdiv(i8 %A, i8 %B) {
-	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @udiv(i8 %A, i8 %B) {
-	%R = udiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @srem(i8 %A, i8 %B) {
-	%R = srem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @urem(i8 %A, i8 %B) {
-	%R = urem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @and(i8 %A, i8 %B) {
-	%R = and i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @or(i8 %A, i8 %B) {
-	%R = or i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @xor(i8 %A, i8 %B) {
-	%R = xor i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
deleted file mode 100644
index 85040df..0000000
--- a/test/CodeGen/Blackfin/basictest.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define void @void(i32, i32) {
-        add i32 0, 0            ; <i32>:3 [#uses=2]
-        sub i32 0, 4            ; <i32>:4 [#uses=2]
-        br label %5
-
-; <label>:5             ; preds = %5, %2
-        add i32 %0, %1          ; <i32>:6 [#uses=2]
-        sub i32 %6, %4          ; <i32>:7 [#uses=1]
-        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
-        br i1 %8, label %9, label %5
-
-; <label>:9             ; preds = %5
-        add i32 %0, %1          ; <i32>:10 [#uses=0]
-        sub i32 %6, %4          ; <i32>:11 [#uses=1]
-        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
-        ret void
-}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
deleted file mode 100644
index e1732a8..0000000
--- a/test/CodeGen/Blackfin/cmp-small-imm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @cmp3(i32 %A) {
-	%R = icmp uge i32 %A, 2
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
deleted file mode 100644
index 6c4f9c5..0000000
--- a/test/CodeGen/Blackfin/cmp64.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; This test tries to use a JustCC register as a data operand for MOVEcc.  It
-; copies (JustCC -> DP), failing because JustCC can only be copied to D.
-; The proper solution would be to restrict the virtual register to D only.
-
-define i32 @main() {
-entry:
-	br label %loopentry
-
-loopentry:
-	%done = icmp sle i64 undef, 5
-	br i1 %done, label %loopentry, label %exit.1
-
-exit.1:
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
deleted file mode 100644
index 363286d..0000000
--- a/test/CodeGen/Blackfin/ct32.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @ctlztest(i32 %B) {
-	%b = call i32 @llvm.ctlz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @cttztest(i32 %B) {
-	%b = call i32 @llvm.cttz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @ctpoptest(i32 %B) {
-	%b = call i32 @llvm.ctpop.i32( i32 %B )
-	ret i32 %b
-}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
deleted file mode 100644
index 7502434..0000000
--- a/test/CodeGen/Blackfin/ct64.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i64 @llvm.ctlz.i64(i64)
-declare i64 @llvm.cttz.i64(i64)
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @ctlztest(i64 %B) {
-	%b = call i64 @llvm.ctlz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @cttztest(i64 %B) {
-	%b = call i64 @llvm.cttz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @ctpoptest(i64 %B) {
-	%b = call i64 @llvm.ctpop.i64( i64 %B )
-	ret i64 %b
-}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
deleted file mode 100644
index eb4af23..0000000
--- a/test/CodeGen/Blackfin/ctlz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctlz.i16(i16)
-
-define i16 @ctlztest(i16 %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctlztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctlztest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
deleted file mode 100644
index 3e22f88..0000000
--- a/test/CodeGen/Blackfin/ctlz64.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
-
-define i32 @main(i64 %arg) nounwind {
-entry:
-	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
-	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
-	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
-	ret i32 0
-}
-
-declare i32 @printf(i8* noalias, ...) nounwind
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
deleted file mode 100644
index 8b6c07e..0000000
--- a/test/CodeGen/Blackfin/ctpop16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctpop.i16(i16)
-
-define i16 @ctpoptest(i16 %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctpoptest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctpoptest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
deleted file mode 100644
index 510882a..0000000
--- a/test/CodeGen/Blackfin/cttz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.cttz.i16(i16)
-
-define i16 @cttztest(i16 %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @cttztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @cttztest_s(i16 signext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
deleted file mode 100644
index 6451c74..0000000
--- a/test/CodeGen/Blackfin/cycles.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-declare i64 @llvm.readcyclecounter()
-
-; CHECK: cycles
-; CHECK: cycles2
-define i64 @cyc64() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-	ret i64 %tmp.1
-}
-
-; CHECK: cycles
-define i32@cyc32() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-        %s = trunc i64 %tmp.1 to i32
-	ret i32 %s
-}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
deleted file mode 100644
index 5fdbe5f..0000000
--- a/test/CodeGen/Blackfin/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Blackfin] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
deleted file mode 100644
index 815ca79..0000000
--- a/test/CodeGen/Blackfin/double-cast.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
deleted file mode 100644
index 7e677fb..0000000
--- a/test/CodeGen/Blackfin/frameindex.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
-
-define void @foo() {
-bb0:
-	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
-	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
deleted file mode 100644
index bc5ade7..0000000
--- a/test/CodeGen/Blackfin/i17mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
deleted file mode 100644
index cb03e3d..0000000
--- a/test/CodeGen/Blackfin/i1mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i1_l = external global i1		; <i1*> [#uses=1]
-@i1_s = external global i1		; <i1*> [#uses=1]
-
-define void @i1_ls() nounwind  {
-	%tmp = load i1* @i1_l		; <i1> [#uses=1]
-	store i1 %tmp, i1* @i1_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
deleted file mode 100644
index 6b5612c..0000000
--- a/test/CodeGen/Blackfin/i1ops.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @adj(i32 %d.1, i32 %ct.1) {
-entry:
-	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
-	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
-	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
-	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
-	ret i32 %tmp.27
-}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
deleted file mode 100644
index 9f8cf48..0000000
--- a/test/CodeGen/Blackfin/i216mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i216_l = external global i216		; <i216*> [#uses=1]
-@i216_s = external global i216		; <i216*> [#uses=1]
-
-define void @i216_ls() nounwind  {
-	%tmp = load i216* @i216_l		; <i216> [#uses=1]
-	store i216 %tmp, i216* @i216_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
deleted file mode 100644
index db23f54..0000000
--- a/test/CodeGen/Blackfin/i248mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin
-@i248_l = external global i248		; <i248*> [#uses=1]
-@i248_s = external global i248		; <i248*> [#uses=1]
-
-define void @i248_ls() nounwind  {
-	%tmp = load i248* @i248_l		; <i248> [#uses=1]
-	store i248 %tmp, i248* @i248_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
deleted file mode 100644
index bc5ade7..0000000
--- a/test/CodeGen/Blackfin/i256mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
deleted file mode 100644
index df74c9a..0000000
--- a/test/CodeGen/Blackfin/i256param.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i256_s = external global i256		; <i256*> [#uses=1]
-
-define void @i256_ls(i256 %x) nounwind  {
-	store i256 %x, i256* @i256_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
deleted file mode 100644
index ca02563..0000000
--- a/test/CodeGen/Blackfin/i56param.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i56_l = external global i56		; <i56*> [#uses=1]
-@i56_s = external global i56		; <i56*> [#uses=1]
-
-define void @i56_ls(i56 %x) nounwind  {
-	store i56 %x, i56* @i56_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
deleted file mode 100644
index ea3a67e..0000000
--- a/test/CodeGen/Blackfin/i8mem.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-@i8_l = external global i8		; <i8*> [#uses=1]
-@i8_s = external global i8		; <i8*> [#uses=1]
-
-define void @i8_ls() nounwind  {
-	%tmp = load i8* @i8_l		; <i8> [#uses=1]
-	store i8 %tmp, i8* @i8_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
deleted file mode 100644
index d623f6b..0000000
--- a/test/CodeGen/Blackfin/inline-asm.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-; Standard "r"
-; CHECK: r0 = r0 + r1;
-define i32 @add_r(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "d"
-; CHECK: r0 = r0 - r1;
-define i32 @add_d(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "a" for P-regs
-; CHECK: p0 = (p0 + p1) << 1;
-define i32 @add_a(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "z" for P0, P1, P2. This is not a real regclass
-; CHECK: p0 = (p0 + p1) << 2;
-define i32 @add_Z(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "C" for CC. This is a single register
-; CHECK: cc = p0 < p1;
-; CHECK: r0 = cc;
-define i32 @add_C(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
deleted file mode 100644
index 6bd9f86..0000000
--- a/test/CodeGen/Blackfin/int-setcc.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define fastcc void @Evaluate() {
-entry:
-	br i1 false, label %cond_false186, label %cond_true
-
-cond_true:		; preds = %entry
-	ret void
-
-cond_false186:		; preds = %entry
-	br i1 false, label %cond_true293, label %bb203
-
-bb203:		; preds = %cond_false186
-	ret void
-
-cond_true293:		; preds = %cond_false186
-	br i1 false, label %cond_true298, label %cond_next317
-
-cond_true298:		; preds = %cond_true293
-	br i1 false, label %cond_next518, label %cond_true397.preheader
-
-cond_next317:		; preds = %cond_true293
-	ret void
-
-cond_true397.preheader:		; preds = %cond_true298
-	ret void
-
-cond_next518:		; preds = %cond_true298
-	br i1 false, label %bb1069, label %cond_true522
-
-cond_true522:		; preds = %cond_next518
-	ret void
-
-bb1069:		; preds = %cond_next518
-	br i1 false, label %cond_next1131, label %bb1096
-
-bb1096:		; preds = %bb1069
-	ret void
-
-cond_next1131:		; preds = %bb1069
-	br i1 false, label %cond_next1207, label %cond_true1150
-
-cond_true1150:		; preds = %cond_next1131
-	ret void
-
-cond_next1207:		; preds = %cond_next1131
-	br i1 false, label %cond_next1219, label %cond_true1211
-
-cond_true1211:		; preds = %cond_next1207
-	ret void
-
-cond_next1219:		; preds = %cond_next1207
-	br i1 false, label %cond_true1223, label %cond_next1283
-
-cond_true1223:		; preds = %cond_next1219
-	br i1 false, label %cond_true1254, label %cond_true1264
-
-cond_true1254:		; preds = %cond_true1223
-	br i1 false, label %bb1567, label %cond_true1369.preheader
-
-cond_true1264:		; preds = %cond_true1223
-	ret void
-
-cond_next1283:		; preds = %cond_next1219
-	ret void
-
-cond_true1369.preheader:		; preds = %cond_true1254
-	ret void
-
-bb1567:		; preds = %cond_true1254
-	%tmp1605 = load i8* null		; <i8> [#uses=1]
-	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
-	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
-
-cond_true1607:		; preds = %bb1567
-	ret void
-
-cond_next1637:		; preds = %bb1567
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
deleted file mode 100644
index a8c01ba..0000000
--- a/test/CodeGen/Blackfin/invalid-apint.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
-; function trunc, file APInt.cpp, line 956.
-
-@str2 = external global [29 x i8]
-
-define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
-entry:
-	%tmp17 = sext i8 %a13 to i32
-	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
-	ret void
-}
-
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
deleted file mode 100644
index 263533c..0000000
--- a/test/CodeGen/Blackfin/jumptable.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-; CHECK: .section .rodata
-; CHECK: JTI0_0:
-; CHECK: .long .BB0_1
-
-define i32 @oper(i32 %op, i32 %A, i32 %B) {
-entry:
-        switch i32 %op, label %bbx [
-               i32 1 , label %bb1
-               i32 2 , label %bb2
-               i32 3 , label %bb3
-               i32 4 , label %bb4
-               i32 5 , label %bb5
-               i32 6 , label %bb6
-               i32 7 , label %bb7
-               i32 8 , label %bb8
-               i32 9 , label %bb9
-               i32 10, label %bb10
-        ]
-bb1:
-	%R1 = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R1
-bb2:
-	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R2
-bb3:
-	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R3
-bb4:
-	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R4
-bb5:
-	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R5
-bb6:
-	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R6
-bb7:
-	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R7
-bb8:
-	%R8 = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R8
-bb9:
-	%R9 = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R9
-bb10:
-	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R10
-bbx:
-        ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
deleted file mode 100644
index 02d32ef..0000000
--- a/test/CodeGen/Blackfin/large-switch.ll
+++ /dev/null
@@ -1,187 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; The switch expansion uses a dynamic shl, and it produces a jumptable
-
-define void @athlon_fp_unit_ready_cost() {
-entry:
-	switch i32 0, label %UnifiedReturnBlock [
-		i32 -1, label %bb2063
-		i32 19, label %bb2035
-		i32 20, label %bb2035
-		i32 21, label %bb2035
-		i32 23, label %bb2035
-		i32 24, label %bb2035
-		i32 27, label %bb2035
-		i32 32, label %bb2035
-		i32 33, label %bb1994
-		i32 35, label %bb2035
-		i32 36, label %bb1994
-		i32 90, label %bb1948
-		i32 94, label %bb1948
-		i32 95, label %bb1948
-		i32 133, label %bb1419
-		i32 135, label %bb1238
-		i32 136, label %bb1238
-		i32 137, label %bb1238
-		i32 138, label %bb1238
-		i32 139, label %bb1201
-		i32 140, label %bb1201
-		i32 141, label %bb1154
-		i32 142, label %bb1126
-		i32 144, label %bb1201
-		i32 145, label %bb1126
-		i32 146, label %bb1201
-		i32 147, label %bb1126
-		i32 148, label %bb1201
-		i32 149, label %bb1126
-		i32 150, label %bb1201
-		i32 151, label %bb1126
-		i32 152, label %bb1096
-		i32 153, label %bb1096
-		i32 154, label %bb1096
-		i32 157, label %bb1096
-		i32 158, label %bb1096
-		i32 159, label %bb1096
-		i32 162, label %bb1096
-		i32 163, label %bb1096
-		i32 164, label %bb1096
-		i32 167, label %bb1201
-		i32 168, label %bb1201
-		i32 170, label %bb1201
-		i32 171, label %bb1201
-		i32 173, label %bb1201
-		i32 174, label %bb1201
-		i32 176, label %bb1201
-		i32 177, label %bb1201
-		i32 179, label %bb993
-		i32 180, label %bb993
-		i32 181, label %bb993
-		i32 182, label %bb993
-		i32 183, label %bb993
-		i32 184, label %bb993
-		i32 365, label %bb1126
-		i32 366, label %bb1126
-		i32 367, label %bb1126
-		i32 368, label %bb1126
-		i32 369, label %bb1126
-		i32 370, label %bb1126
-		i32 371, label %bb1126
-		i32 372, label %bb1126
-		i32 373, label %bb1126
-		i32 384, label %bb1126
-		i32 385, label %bb1126
-		i32 386, label %bb1126
-		i32 387, label %bb1126
-		i32 388, label %bb1126
-		i32 389, label %bb1126
-		i32 390, label %bb1126
-		i32 391, label %bb1126
-		i32 392, label %bb1126
-		i32 525, label %bb919
-		i32 526, label %bb839
-		i32 528, label %bb919
-		i32 529, label %bb839
-		i32 532, label %cond_next6.i97
-		i32 533, label %cond_next6.i81
-		i32 534, label %bb495
-		i32 536, label %cond_next6.i81
-		i32 537, label %cond_next6.i81
-		i32 538, label %bb396
-		i32 539, label %bb288
-		i32 541, label %bb396
-		i32 542, label %bb396
-		i32 543, label %bb396
-		i32 544, label %bb396
-		i32 545, label %bb189
-		i32 546, label %cond_next6.i
-		i32 547, label %bb189
-		i32 548, label %cond_next6.i
-		i32 549, label %bb189
-		i32 550, label %cond_next6.i
-		i32 551, label %bb189
-		i32 552, label %cond_next6.i
-		i32 553, label %bb189
-		i32 554, label %cond_next6.i
-		i32 555, label %bb189
-		i32 556, label %cond_next6.i
-		i32 557, label %bb189
-		i32 558, label %cond_next6.i
-		i32 618, label %bb40
-		i32 619, label %bb18
-		i32 620, label %bb40
-		i32 621, label %bb10
-		i32 622, label %bb10
-	]
-
-bb10:
-	ret void
-
-bb18:
-	ret void
-
-bb40:
-	ret void
-
-cond_next6.i:
-	ret void
-
-bb189:
-	ret void
-
-bb288:
-	ret void
-
-bb396:
-	ret void
-
-bb495:
-	ret void
-
-cond_next6.i81:
-	ret void
-
-cond_next6.i97:
-	ret void
-
-bb839:
-	ret void
-
-bb919:
-	ret void
-
-bb993:
-	ret void
-
-bb1096:
-	ret void
-
-bb1126:
-	ret void
-
-bb1154:
-	ret void
-
-bb1201:
-	ret void
-
-bb1238:
-	ret void
-
-bb1419:
-	ret void
-
-bb1948:
-	ret void
-
-bb1994:
-	ret void
-
-bb2035:
-	ret void
-
-bb2063:
-	ret void
-
-UnifiedReturnBlock:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
deleted file mode 100644
index eb18d41..0000000
--- a/test/CodeGen/Blackfin/load-i16.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; This somewhat contrived function heavily exercises register classes
-; It can trick -join-cross-class-copies into making illegal joins
-
-define void @f(i16** nocapture %p) nounwind readonly {
-entry:
-	%tmp1 = load i16** %p		; <i16*> [#uses=1]
-	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
-	%ptr = getelementptr i16* %tmp1, i16 %tmp2
-    store i16 %tmp2, i16* %ptr
-    ret void
-}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
deleted file mode 100644
index e44672f..0000000
--- a/test/CodeGen/Blackfin/logic-i16.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @and(i16 %A, i16 %B) {
-	%R = and i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @or(i16 %A, i16 %B) {
-	%R = or i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @xor(i16 %A, i16 %B) {
-	%R = xor i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
deleted file mode 100644
index 2df32ca..0000000
--- a/test/CodeGen/Blackfin/many-args.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
-	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.218 = load float* null		; <float> [#uses=1]
-	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
-	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
-	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
-	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
-	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
-	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
-	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
-	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
-	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
-	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
-	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
-	ret i32 0
-}
-
-declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
deleted file mode 100644
index 72bacee..0000000
--- a/test/CodeGen/Blackfin/mulhu.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
-	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
-	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
-	%struct.def_operand_ptr = type { %struct.tree_node** }
-	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
-	%struct.eh_status = type opaque
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
-	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
-	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
-	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
-	%struct.lang_decl = type opaque
-	%struct.language_function = type opaque
-	%struct.location_t = type { i8*, i32 }
-	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
-	%struct.lpt_decision = type { i32, i32 }
-	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
-	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
-	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
-	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
-	%struct.stack_local_entry = type opaque
-	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
-	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
-	%struct.temp_slot = type opaque
-	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
-	%struct.tree_ann_d = type { %struct.stmt_ann_d }
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
-	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_u1 = type { i64 }
-	%struct.tree_decl_u2 = type { %struct.function* }
-	%struct.tree_node = type { %struct.tree_decl }
-	%struct.u = type { [1 x i64] }
-	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
-	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
-	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
-	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
-
-define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
-entry:
-	switch i32 0, label %bb91 [
-		i32 0, label %bb
-		i32 1, label %bb6
-		i32 3, label %cond_next135
-	]
-
-bb:		; preds = %entry
-	ret i1 false
-
-bb6:		; preds = %entry
-	br i1 false, label %bb87, label %cond_next27
-
-cond_next27:		; preds = %bb6
-	br i1 false, label %cond_true30, label %cond_next55
-
-cond_true30:		; preds = %cond_next27
-	br i1 false, label %cond_next41, label %cond_true35
-
-cond_true35:		; preds = %cond_true30
-	ret i1 false
-
-cond_next41:		; preds = %cond_true30
-	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
-	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
-	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
-	br label %bb87
-
-cond_next55:		; preds = %cond_next27
-	ret i1 false
-
-bb87:		; preds = %cond_next41, %bb6
-	ret i1 false
-
-bb91:		; preds = %entry
-	ret i1 false
-
-cond_next135:		; preds = %entry
-	ret i1 false
-}
-
-declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
-
-declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
deleted file mode 100644
index 9e54b73..0000000
--- a/test/CodeGen/Blackfin/printf.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
deleted file mode 100644
index 7ac7e80..0000000
--- a/test/CodeGen/Blackfin/printf2.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
deleted file mode 100644
index 1ac1408..0000000
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin 
-
-; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
-; operation after LegalizeOps.
-
-define void @mng_display_bgr565() {
-entry:
-	br i1 false, label %bb.preheader, label %return
-
-bb.preheader:
-	br i1 false, label %cond_true48, label %cond_next80
-
-cond_true48:
-	%tmp = load i8* null
-	%tmp51 = zext i8 %tmp to i16
-	%tmp99 = load i8* null
-	%tmp54 = bitcast i8 %tmp99 to i8
-	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
-	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
-	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
-	%tmp52 = shl i16 %tmp51, 5
-	%tmp56 = and i16 %tmp55.upgrd.2, 28
-	%tmp57 = or i16 %tmp56, %tmp52
-	%tmp60 = zext i16 %tmp57 to i32
-	%tmp62 = xor i32 0, 65535
-	%tmp63 = mul i32 %tmp60, %tmp62
-	%tmp65 = add i32 0, %tmp63
-	%tmp69 = add i32 0, %tmp65
-	%tmp70 = lshr i32 %tmp69, 16
-	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
-	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
-	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
-	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
-	store i8 %tmp76, i8* null
-	ret void
-
-cond_next80:
-	ret void
-
-return:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
deleted file mode 100644
index d344fad..0000000
--- a/test/CodeGen/Blackfin/promote-setcc.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-; The DAG combiner may sometimes create illegal i16 SETCC operations when run
-; after LegalizeOps. Try to tease out all the optimizations in
-; TargetLowering::SimplifySetCC.
-
-@x = external global i16
-@y = external global i16
-
-declare i16 @llvm.ctlz.i16(i16)
-
-; Case (srl (ctlz x), 5) == const
-; Note: ctlz is promoted, so this test does not catch the DAG combiner
-define i1 @srl_ctlz_const() {
-  %x = load i16* @x
-  %c = call i16 @llvm.ctlz.i16(i16 %x)
-  %s = lshr i16 %c, 4
-  %r = icmp eq i16 %s, 1
-  ret i1 %r
-}
-
-; Case (zext x) == const
-define i1 @zext_const() {
-  %x = load i16* @x
-  %r = icmp ugt i16 %x, 1
-  ret i1 %r
-}
-
-; Case (sext x) == const
-define i1 @sext_const() {
-  %x = load i16* @x
-  %y = add i16 %x, 1
-  %x2 = sext i16 %y to i32
-  %r = icmp ne i32 %x2, -1
-  ret i1 %r
-}
-
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
deleted file mode 100644
index 1426655..0000000
--- a/test/CodeGen/Blackfin/sdiv.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
deleted file mode 100644
index 0f7f270..0000000
--- a/test/CodeGen/Blackfin/simple-select.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-declare i1 @foo()
-
-define i32 @test(i32* %A, i32* %B) {
-	%a = load i32* %A
-	%b = load i32* %B
-	%cond = call i1 @foo()
-	%c = select i1 %cond, i32 %a, i32 %b
-	ret i32 %c
-}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
deleted file mode 100644
index 3680ec6..0000000
--- a/test/CodeGen/Blackfin/switch.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i32 @foo(i32 %A, i32 %B, i32 %C) {
-entry:
-	switch i32 %A, label %out [
-		i32 1, label %bb
-		i32 0, label %bb13
-	]
-
-bb:		; preds = %entry
-	ret i32 1
-
-bb13:		; preds = %entry
-	ret i32 1
-
-out:		; preds = %entry
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
deleted file mode 100644
index 7877bce..0000000
--- a/test/CodeGen/Blackfin/switch2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i8* @FindChar(i8* %CurPtr) {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%tmp = load i8* null		; <i8> [#uses=1]
-	switch i8 %tmp, label %bb [
-		i8 0, label %bb7
-		i8 120, label %bb7
-	]
-
-bb7:		; preds = %bb, %bb
-	ret i8* null
-}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
deleted file mode 100644
index 0b103a3..0000000
--- a/test/CodeGen/Blackfin/sync-intr.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-define void @f() nounwind {
-entry:
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: csync;
-        call void @llvm.bfin.csync()
-
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: ssync;
-        call void @llvm.bfin.ssync()
-	ret void
-}
-
-declare void @llvm.bfin.csync() nounwind
-declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
deleted file mode 100644
index 0b06041..0000000
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure that global variables do not collide if they have the same name,
-; but different types.
-
-@X = global i32 5               ; <i32*> [#uses=0]
-@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
deleted file mode 100644
index a9f54e4..0000000
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This case was emitting code that looked like this:
-; ...
-;   llvm_BB1:       /* no statement here */
-; }
-; 
-; Which the Sun C compiler rejected, so now we are sure to put a return 
-; instruction in there if the basic block is otherwise empty.
-;
-define void @test() {
-        br label %BB1
-
-BB2:            ; preds = %BB2
-        br label %BB2
-
-BB1:            ; preds = %0
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
deleted file mode 100644
index 2afb1a0..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Test const pointer refs & forward references
-
-@t3 = global i32* @t1           ; <i32**> [#uses=0]
-@t1 = global i32 4              ; <i32*> [#uses=1]
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
deleted file mode 100644
index b71cf07..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
-global float* @2                       ;; Duplicate forward numeric reference
-global float 0.0
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
deleted file mode 100644
index b5a1f0b..0000000
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
-@somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
deleted file mode 100644
index 10b9fe2..0000000
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@fptr = global void ()* @f       ;; Forward ref method defn
-declare void @f()               ;; External method
-
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
deleted file mode 100644
index 0827423..0000000
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
deleted file mode 100644
index 59aafd5..0000000
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C Writer bombs on this testcase because it tries the print the prototype
-; for the test function, which tries to print the argument name.  The function
-; has not been incorporated into the slot calculator, so after it does the name
-; lookup, it tries a slot calculator lookup, which fails.
-
-define i32 @test(i32) {
-        ret i32 0
-}
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
deleted file mode 100644
index 6c4d629..0000000
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Indirect function call test... found by Joel & Brian
-;
-
-@taskArray = external global i32*               ; <i32**> [#uses=1]
-
-define void @test(i32 %X) {
-        %Y = add i32 %X, -1             ; <i32> [#uses=1]
-        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]
-        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]
-        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]
-        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]
-        call void %cast101( i32 1000 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
deleted file mode 100644
index 1187a37..0000000
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase fails because the C backend does not arrange to output the 
-; contents of a structure type before it outputs the structure type itself.
-
-@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]
-@X = external global { float }          ; <{ float }*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
deleted file mode 100644
index 021adb9..0000000
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @test() {
-        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
deleted file mode 100644
index e915cd2..0000000
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c
-
-
-declare void @foo(...)
-
-
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
deleted file mode 100644
index 2cdd15c..0000000
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@bob = external global i32              ; <i32*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
deleted file mode 100644
index 82d594f..0000000
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]
-        ret i32 0
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
deleted file mode 100644
index 92d582d..0000000
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Apparently this constant was unsigned in ISO C 90, but not in C 99.
-
-define i32 @foo() {
-        ret i32 -2147483648
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
deleted file mode 100644
index a42dc27..0000000
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase breaks the C backend, because gcc doesn't like (...) functions
-; with no arguments at all.
-
-define void @test(i64 %Ptr) {
-        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]
-        call void (...)* %P( i64 %Ptr )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
deleted file mode 100644
index 19c7840..0000000
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C backend was dying when there was no typename for a struct type!
-
-declare i32 @test(i32, { [32 x i32] }*)
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
deleted file mode 100644
index 048e045..0000000
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c
-
-%X = type { i32, float }
-
-define void @test() {
-        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
deleted file mode 100644
index 6197b30..0000000
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure hex constant does not continue into a valid hexadecimal letter/number
-@version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
deleted file mode 100644
index f6177ea..0000000
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c
-
-@version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
deleted file mode 100644
index f0b1bbc..0000000
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare i32 @callee(i32, i32)
-
-define i32 @test(i32 %X) {
-; <label>:0
-        %A = invoke i32 @callee( i32 %X, i32 5 )
-                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]
-
-Ok:             ; preds = %Threw, %0
-        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]
-        ret i32 %B
-
-Threw:          ; preds = %0
-        br label %Ok
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
deleted file mode 100644
index 4bd1da2..0000000
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c | grep common | grep X
-
-@X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
deleted file mode 100644
index 0fbb3fe..0000000
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This is a non-normal FP value: it's a nan.
-@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
-@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
deleted file mode 100644
index 9195634..0000000
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
-
-define void @test(%A*) {
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
deleted file mode 100644
index b4389ff..0000000
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c
-
-; reduced from DOOM.
-        %union._XEvent = type { i32 }
-@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]
-
-define void @I_InitGraphics() {
-shortcirc_next.3:
-        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
deleted file mode 100644
index 6a26291..0000000
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-@y = weak global i8 0           ; <i8*> [#uses=1]
-
-define i32 @testcaseshr() {
-entry:
-        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
-define i32 @testcaseshl() {
-entry:
-        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
deleted file mode 100644
index 142fbd8..0000000
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=c | grep builtin_return_address
-
-declare i8* @llvm.returnaddress(i32)
-
-declare i8* @llvm.frameaddress(i32)
-
-define i8* @test1() {
-        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
-define i8* @test2() {
-        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
deleted file mode 100644
index d1c6861..0000000
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 
-; explicitly specified prototypes, inserting a new function if the old one
-; didn't exist.  This caused there to be two external memcpy functions in 
-; this testcase for example, which caused the CBE to mangle one, screwing
-; everything up.  :(  Test that this does not happen anymore.
-;
-; RUN: llc < %s -march=c | not grep _memcpy
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare float* @memcpy(i32*, i32, i32)
-
-define i32 @test(i8* %A, i8* %B, i32* %C) {
-        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]
-        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )
-        ret i32 7
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
deleted file mode 100644
index 6fceb08..0000000
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; This is a non-normal FP value
-; RUN: llc < %s -march=c | grep FPConstant | grep static
-
-define float @func() {
-        ret float 0xFFF0000000000000
-}
-
-define double @func2() {
-        ret double 0xFF20000000000000
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
deleted file mode 100644
index cf59634..0000000
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c | grep func1 | grep WEAK
-
-define linkonce i32 @func1() {
-        ret i32 5
-}
-
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
deleted file mode 100644
index 3ee23d1..0000000
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare void @llvm.va_end(i8*)
-
-define void @test() {
-        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]
-        call void @llvm.va_end( i8* %va.upgrd.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
deleted file mode 100644
index af8f441..0000000
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; The CBE should not emit code that casts the function pointer.  This causes
-; GCC to get testy and insert trap instructions instead of doing the right
-; thing. :(
-; RUN: llc < %s -march=c
-
-declare void @external(i8*)
-
-define i32 @test(i32* %X) {
-        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]
-        ret i32 %RV
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
deleted file mode 100644
index 78e9bac..0000000
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | not grep extern.*msg
-; PR472
-
-@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
-
-define i8* @foo() {
-entry:
-        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
deleted file mode 100644
index 57a9adc..0000000
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-define i32 @foo() {
-        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
-}
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
deleted file mode 100644
index dd505af..0000000
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c | grep volatile
-
-define void @test(i32* %P) {
-        %X = volatile load i32* %P              ; <i32> [#uses=1]
-        volatile store i32 %X, i32* %P
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
deleted file mode 100644
index 808b8f9..0000000
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=c | not grep -- --65535
-; PR596
-
-target datalayout = "e-p:32:32"
-target triple = "i686-pc-linux-gnu"
-
-declare void @func(i32)
-
-define void @funcb() {
-entry:
-        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]
-        call void @func( i32 %tmp.1 )
-        br label %return
-
-return:         ; preds = %entry
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
deleted file mode 100644
index 6e650eb..0000000
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c | grep fmod
-
-define double @test(double %A, double %B) {
-        %C = frem double %A, %B         ; <double> [#uses=1]
-        ret double %C
-}
-
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
deleted file mode 100644
index 99de837..0000000
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
-
-@G = external global void ()*           ; <void ()**> [#uses=2]
-
-define void @test() {
-        volatile store void ()* @test, void ()** @G
-        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
deleted file mode 100644
index c9df800..0000000
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:   grep __BITCAST | count 14
-
-define i32 @test1(float %F) {
-        %X = bitcast float %F to i32            ; <i32> [#uses=1]
-        ret i32 %X
-}
-
-define float @test2(i32 %I) {
-        %X = bitcast i32 %I to float            ; <float> [#uses=1]
-        ret float %X
-}
-
-define i64 @test3(double %D) {
-        %X = bitcast double %D to i64           ; <i64> [#uses=1]
-        ret i64 %X
-}
-
-define double @test4(i64 %L) {
-        %X = bitcast i64 %L to double           ; <double> [#uses=1]
-        ret double %X
-}
-
-define double @test5(double %D) {
-        %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
-        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
-        %res = bitcast i64 %Z to double         ; <double> [#uses=1]
-        ret double %res
-}
-
-define float @test6(float %F) {
-        %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
-        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
-        %res = bitcast i32 %Z to float          ; <float> [#uses=1]
-        ret float %res
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
-        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]
-        %b = call float @test2( i32 %a )                ; <float> [#uses=0]
-        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]
-        %d = call double @test4( i64 %c )               ; <double> [#uses=0]
-        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]
-        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]
-        ret i32 %a
-}
-
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
deleted file mode 100644
index da36e78..0000000
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; For PR1099
-; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
-
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8"
-        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }
-
-
-define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {
-newFuncRoot:
-        br label %entry.ce
-
-cond_next.exitStub:             ; preds = %entry.ce
-        ret i1 true
-
-entry.return_crit_edge.exitStub:                ; preds = %entry.ce
-        ret i1 false
-
-entry.ce:               ; preds = %newFuncRoot
-        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]
-        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]
-        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]
-        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub
-}
-
-
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
deleted file mode 100644
index 4f699b7..0000000
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep __builtin_stack_save
-; RUN: llc < %s -march=c | grep __builtin_stack_restore
-; PR1028
-
-declare i8* @llvm.stacksave()
-declare void @llvm.stackrestore(i8*)
-
-define i8* @test() {
-    %s = call i8* @llvm.stacksave()
-    call void @llvm.stackrestore(i8* %s)
-    ret i8* %s
-}
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
deleted file mode 100644
index 7d508e4..0000000
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-; PR1181
-target datalayout = "e-p:64:64"
-target triple = "x86_64-apple-darwin8"
-
-
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
-define fastcc void @InitUser_data_unregistered() {
-entry:
-        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )
-        ret void
-}
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
deleted file mode 100644
index 7e1ff2a..0000000
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; PR1164
-; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
-
-@G = global i32 123
-@ltmp_0_1 = global i32 123
-
-define i32 @test(i32 *%G) {
-        %A = load i32* %G
-        %B = load i32* @ltmp_0_1
-        %C = add i32 %A, %B
-        ret i32 %C
-}
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
deleted file mode 100644
index c8bfdd6..0000000
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c | grep {packed}
-
-	%struct.p = type <{ i16 }>
-
-define i32 @main() {
-entry:
-        %t = alloca %struct.p, align 2
-	ret i32 5
-}
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
deleted file mode 100644
index 6e0cf68..0000000
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
-
-define void @test(i32* %P) {
-        %X = load i32* %P, align 1
-        store i32 %X, i32* %P, align 1
-        ret void
-}
-
-define void @test2(i32* %P) {
-        %X = volatile load i32* %P, align 2
-        volatile store i32 %X, i32* %P, align 2
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
deleted file mode 100644
index e9fa552..0000000
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
-define i32 @test(i32 %r) {
-  %s = icmp eq i32 %r, 0
-  %t = add i1 %s, %s
-  %u = zext i1 %t to i32
-  br i1 %t, label %A, label %B
-A:
-
-  ret i32 %u
-B:
-
-  %v = select i1 %t, i32 %r, i32 %u
-  ret i32 %v
-}
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
deleted file mode 100644
index b72b573..0000000
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=c
-; PR2907
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.5"
-	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
-	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
-	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
-	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
-	%"struct.Tensor<1,0>" = type { [1 x double] }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
-	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
-	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
-	%"struct.std::type_info" = type { i32 (...)**, i8* }
-	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
-	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
-
-define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
-entry:
-	br label %bb4
-
-bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
-	br i1 %0, label %bb4, label %bb5.split
-
-bb5.split:		; preds = %bb4
-	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
-	br label %bb4
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
deleted file mode 100644
index 0ae480d..0000000
--- a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=c
-; Check that uadd and sadd with overflow are handled by C Backend.
-
-%0 = type { i32, i1 }        ; type %0
-
-define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
-
-declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
-
diff --git a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
deleted file mode 100644
index 054a3ca..0000000
--- a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
-; PR2407
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo() {
-  %newcw = alloca i16             ; <i16*> [#uses=2]
-  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
-%newcw ) nounwind 
-  ret void
-}
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
deleted file mode 100644
index 44e3a5e..0000000
--- a/test/CodeGen/CBackend/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
-    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/CodeGen/CBackend/dg.exp b/test/CodeGen/CBackend/dg.exp
deleted file mode 100644
index 9d78940..0000000
--- a/test/CodeGen/CBackend/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
deleted file mode 100644
index 7dec3d9..0000000
--- a/test/CodeGen/CBackend/fneg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @func() nounwind {
-  entry:
-  %0 = fsub double -0.0, undef
-  ret void
-}
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
deleted file mode 100644
index bf8477b..0000000
--- a/test/CodeGen/CBackend/pr2408.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
-; PR2408
-
-define i32 @a(i32 %a) {
-entry:
-        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
-        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
-        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
-        ret i32 %div
-}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
deleted file mode 100644
index b7b7677..0000000
--- a/test/CodeGen/CBackend/vectors.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=c
-@.str15 = external global [2 x i8]
-
-define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
-  %c = insertelement <4 x i32> %a, i32 1, i32 %b
-  
-  ret <4 x i32> %c
-}
-
-define i32 @test2(<4 x i32> %a, i32 %b) {
-  %c = extractelement <4 x i32> %a, i32 1
-  
-  ret i32 %c
-}
-
-define <4 x float> @test3(<4 x float> %Y) {
-	%Z = fadd <4 x float> %Y, %Y
-	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
-	ret <4 x float> %X
-}
-
-define void @test4() {
-	%x = alloca <4 x float>
-	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
-	store float 1.0, float* %tmp3.i16
-	ret void
-}
-
-define i32* @test5({i32, i32} * %P) {
-	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
-	ret i32* %x
-}
-
-define i8* @test6() {
-  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
-}
-
diff --git a/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
new file mode 100644
index 0000000..419f594
--- /dev/null
+++ b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=cpp
+declare void @foo(<4 x i32>)
+define void @bar() {
+  call void @foo(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret void
+}
diff --git a/test/CodeGen/CPP/dg.exp b/test/CodeGen/CPP/dg.exp
deleted file mode 100644
index 3276dcc..0000000
--- a/test/CodeGen/CPP/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CppBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
new file mode 100644
index 0000000..4d4b4a4
--- /dev/null
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CppBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index 72478a1..4203e91 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -5,6 +5,9 @@
 ; RUN: grep andhi  %t1.s | count 30
 ; RUN: grep andbi  %t1.s | count 4
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index 559b266..11cf770 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,7 +1,3 @@
-; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
-; RUN: grep brsl    %t1.s | count 1
-; RUN: grep brasl   %t1.s | count 2
-; RUN: grep stqd    %t1.s | count 82
 ; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
deleted file mode 100644
index 141361d..0000000
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
-; RUN: grep bisl    %t1.s | count 7
-; RUN: grep ila     %t1.s | count 1
-; RUN: grep rotqby  %t1.s | count 5
-; RUN: grep lqa     %t1.s | count 1
-; RUN: grep lqd     %t1.s | count 12
-; RUN: grep dispatch_tab %t1.s | count 5
-; RUN: grep bisl    %t2.s | count 7
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep iohl    %t2.s | count 2
-; RUN: grep rotqby  %t2.s | count 5
-; RUN: grep lqd     %t2.s | count 13
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep ai      %t2.s | count 9
-; RUN: grep dispatch_tab %t2.s | count 6
-
-; ModuleID = 'call_indirect.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
-
-define void @dispatcher(i32 %i_arg, float %f_arg) {
-entry:
-        %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
-        tail call void %tmp2( i32 %i_arg, float %f_arg )
-        %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
-        tail call void %tmp2.1( i32 %i_arg, float %f_arg )
-        %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
-        tail call void %tmp2.2( i32 %i_arg, float %f_arg )
-        %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
-        tail call void %tmp2.3( i32 %i_arg, float %f_arg )
-        %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
-        tail call void %tmp2.4( i32 %i_arg, float %f_arg )
-        %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
-        tail call void %tmp2.5( i32 %i_arg, float %f_arg )
-        ret void
-}
-
-@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
-@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
-
-define void @double_indirect_call() {
-        %a = load void ()*** @ptr.a, align 16
-        %b = load void ()** %a, align 4
-        tail call void %b()
-        ret void
-}
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
deleted file mode 100644
index d416479..0000000
--- a/test/CodeGen/CellSPU/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CellSPU] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
new file mode 100644
index 0000000..ea00867
--- /dev/null
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CellSPU' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index b770cad..57ac709 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,10 @@
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
 ; RUN: grep xshw   %t1.s | count 4
+
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4f1febb..f329266 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -6,6 +6,9 @@
 ; RUN: grep orbi   %t1.s | count 15
 ; RUN: FileCheck %s < %t1.s
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index b1219e6..9770935 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
   %el = extractelement <4 x float> %param, i32 1
   %vec1 = insertelement <1 x float> undef, float %el, i32 0
   %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index c804256..65e0aa6 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3252c77..f4aad44 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 10
 ; RUN: grep {shlhi	}  %t1.s | count 3
-; RUN: grep {shl	}  %t1.s | count 11
+; RUN: grep {shl	}  %t1.s | count 10
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 14
-; RUN: grep {andi	}  %t1.s | count 2
-; RUN: grep {rotmi	}  %t1.s | count 2
+; RUN: grep {and	}  %t1.s | count 15
+; RUN: grep {andi	}  %t1.s | count 4
+; RUN: grep {rotmi	}  %t1.s | count 4
 ; RUN: grep {rotqmbyi	}  %t1.s | count 1
 ; RUN: grep {rotqmbii	}  %t1.s | count 2
 ; RUN: grep {rotqmby	}  %t1.s | count 1
@@ -342,3 +342,7 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
 	%rv = ashr <8 x i16> %val, %sh
 	ret <8 x i16> %rv
 }
+
+define <2 x i64> @special_const() {
+  ret <2 x i64> <i64 4294967295, i64 4294967295>
+}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index c88a258..973586b 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,12 +1,14 @@
 ; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
 
+;CHECK: shuffle
 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
   ; CHECK: cwd {{\$.}}, 0($sp)
   ; CHECK: shufb {{\$., \$4, \$3, \$.}}
   %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
   ret <4 x float> %val
 }
- 
+
+;CHECK: splat
 define <4 x float> @splat(float %param1) {
   ; CHECK: lqa
   ; CHECK: shufb $3
@@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) {
   ret <4 x float> %val  
 }
 
+;CHECK: test_insert
 define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
 ;CHECK:	lqa	$6,
@@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   ret void 
 }
 
+;CHECK: test_insert_1
 define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
 ;CHECK: cwd     $5, 4($sp)
 ;CHECK: shufb   $3, $4, $3, $5
@@ -39,6 +43,7 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
   ret <4 x float> %rv
 }
 
+;CHECK: test_v2i32
 define <2 x i32> @test_v2i32(<4 x i32>%vec)
 {
 ;CHECK: rotqbyi $3, $3, 4
@@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec)
 
 define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 8
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 2,i32 3,i32 0, i32 1>
   ret <4 x i32> %rv
 }
 
+;CHECK: test_v4i32_rot4
 define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 1,i32 2,i32 3, i32 0>
   ret <4 x i32> %rv
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index adbb5ef..8c32750 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -22,6 +22,9 @@
 ; RUN: grep shufb   %t2.s | count 7
 ; RUN: grep stqd    %t2.s | count 7
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 ; ModuleID = 'struct_1.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
index 71d4aba..9c5b896 100644
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param)
 
 define %vec @test_add(%vec %param)
 {
-;CHECK: a {{\$.}}, $3, $3
+;CHECK: shufb
+;CHECK: addx
   %1 = add %vec %param, %param
 ;CHECK: bi $lr
   ret %vec %1
@@ -17,21 +18,14 @@ define %vec @test_add(%vec %param)
 
 define %vec @test_sub(%vec %param)
 {
-;CHECK: sf {{\$.}}, $4, $3
   %1 = sub %vec %param, <i32 1, i32 1>
-
 ;CHECK: bi $lr
   ret %vec %1
 }
 
 define %vec @test_mul(%vec %param)
 {
-;CHECK: mpyu
-;CHECK: mpyh
-;CHECK: a {{\$., \$., \$.}}
-;CHECK: a {{\$., \$., \$.}}
   %1 = mul %vec %param, %param
-
 ;CHECK: bi $lr
   ret %vec %1
 }
@@ -56,22 +50,12 @@ define i32 @test_extract() {
 
 define void @test_store( %vec %val, %vec* %ptr)
 {
-;CHECK: stqd $3, 0(${{.}})
-;CHECK: bi $lr
   store %vec %val, %vec* %ptr
   ret void
 }
 
-;Alignment of <2 x i32> is not *directly* defined in the ABI
-;It probably is safe to interpret it as an array, thus having 8 byte
-;alignment (according to ABI). This tests that the size of
-;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
-;two arrays
 define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
 {
-; CHECK-NOT:	ai	$3, $3, 16
-; CHECK:	ai	$3, $3, 8
-; CHECK:	bi	$lr
    %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
    ret <2 x i32>* %rv
 }
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 943ed88..d67559e 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -30,8 +30,6 @@ UnifiedUnreachableBlock:		; preds = %entry
 
 declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 288bfd2..9f10206 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -4,8 +4,8 @@
 
 define i32 @main(i64 %arg) nounwind  {
 entry:
-	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
-	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
 	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
 	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
 	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
@@ -16,6 +16,6 @@ entry:
 
 declare i32 @printf(i8* noalias , ...) nounwind 
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone 
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
index 3cbf4c5..b483009 100644
--- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -15,8 +15,6 @@
 %"struct.std::locale::facet" = type { i32 (...)**, i32 }
 %union..0._15 = type { i32 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
 declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
deleted file mode 100644
index 4758697..0000000
--- a/test/CodeGen/Generic/bool-vector.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; PR1845
-
-define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
-Body:
-        %castPtr = bitcast <4 x i1>* %boolVectorPtr to <4 x i1>*
-        %someBools = load <4 x i1>* %castPtr, align 1           ; <<4 x i1>>
-        %internal = alloca <4 x i1>, align 16           ; <<4 x i1>*> [#uses=1]
-        store <4 x i1> %someBools, <4 x i1>* %internal, align 1
-        ret void
-}
diff --git a/test/CodeGen/Generic/dbg-declare.ll b/test/CodeGen/Generic/dbg-declare.ll
new file mode 100644
index 0000000..01f7d6d
--- /dev/null
+++ b/test/CodeGen/Generic/dbg-declare.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -O0
+; <rdar://problem/11134152>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @foo(i32* %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32*, align 8
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32* %x, i32** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  %0 = load i32** %x.addr, align 8, !dbg !16
+  %1 = load i32* %0, align 4, !dbg !16
+  %2 = zext i32 %1 to i64, !dbg !16
+  %3 = call i8* @llvm.stacksave(), !dbg !16
+  store i8* %3, i8** %saved_stack, !dbg !16
+  %vla = alloca i8, i64 %2, align 16, !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %4 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 5, i32 21, metadata !5, null}
+!16 = metadata !{i32 7, i32 13, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!23 = metadata !{i32 7, i32 8, metadata !17, null}
+!24 = metadata !{i32 9, i32 1, metadata !17, null}
+!25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/Generic/dg.exp b/test/CodeGen/Generic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/CodeGen/Generic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 1db7549..abcdb9b 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -21,19 +21,19 @@ define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %C
 	ret void
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
 define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.ctlz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctlz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctlz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctlz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
@@ -41,19 +41,19 @@ define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP
 	ret void
 }
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
 define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.cttz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.cttz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.cttz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
diff --git a/test/CodeGen/Generic/pr12507.ll b/test/CodeGen/Generic/pr12507.ll
new file mode 100644
index 0000000..c793358
--- /dev/null
+++ b/test/CodeGen/Generic/pr12507.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+@c = external global i32, align 4
+
+define void @foo(i160 %x) {
+entry:
+  %cmp.i = icmp ne i160 %x, 340282366920938463463374607431768211456
+  %conv.i = zext i1 %cmp.i to i32
+  %tobool.i = icmp eq i32 %conv.i, 0
+  br i1 %tobool.i, label %if.then.i, label %fn1.exit
+
+if.then.i:
+  store i32 0, i32* @c, align 4
+  br label %fn1.exit
+
+fn1.exit:
+  ret void
+}
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index 63052c1..77636eb 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -185,3 +185,11 @@ define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
         ret i32 %reg820
 }
 
+; Test case for scalarising a 1 element vselect
+;
+define <1 x i32> @checkScalariseVSELECT(<1 x i32> %a, <1 x i32> %b) {
+        %cond = icmp uge <1 x i32> %a, %b
+        %s = select <1 x i1> %cond, <1 x i32> %a, <1 x i32> %b
+        ret <1 x i32> %s
+}
+
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
new file mode 100644
index 0000000..69002e0
--- /dev/null
+++ b/test/CodeGen/Hexagon/args.ll
@@ -0,0 +1,19 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = #7
+; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: r0 = #1
+; CHECK: r1 = #2
+; CHECK: r2 = #3
+; CHECK: r3 = #4
+; CHECK: r4 = #5
+; CHECK: r5 = #6
+
+
+define void @foo() nounwind {
+entry:
+  call void @bar(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+  ret void
+}
+
+declare void @bar(i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
new file mode 100644
index 0000000..36abd74
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -0,0 +1,18 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i64
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i64* @k, align 8
+  %conv = trunc i64 %1 to i32
+  %2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
+  store i64 %2, i64* @k, align 8
+  ret void
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
new file mode 100644
index 0000000..04c2ec1
--- /dev/null
+++ b/test/CodeGen/Hexagon/double.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_adddf3
+; CHECK: __hexagon_subdf3
+
+define void @foo(double* %acc, double %num, double %num2) nounwind {
+entry:
+  %acc.addr = alloca double*, align 4
+  %num.addr = alloca double, align 8
+  %num2.addr = alloca double, align 8
+  store double* %acc, double** %acc.addr, align 4
+  store double %num, double* %num.addr, align 8
+  store double %num2, double* %num2.addr, align 8
+  %0 = load double** %acc.addr, align 4
+  %1 = load double* %0
+  %2 = load double* %num.addr, align 8
+  %add = fadd double %1, %2
+  %3 = load double* %num2.addr, align 8
+  %sub = fsub double %add, %3
+  %4 = load double** %acc.addr, align 4
+  store double %sub, double* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
new file mode 100644
index 0000000..51acf2e
--- /dev/null
+++ b/test/CodeGen/Hexagon/float.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+  %acc.addr = alloca float*, align 4
+  %num.addr = alloca float, align 4
+  %num2.addr = alloca float, align 4
+  store float* %acc, float** %acc.addr, align 4
+  store float %num, float* %num.addr, align 4
+  store float %num2, float* %num2.addr, align 4
+  %0 = load float** %acc.addr, align 4
+  %1 = load float* %0
+  %2 = load float* %num.addr, align 4
+  %add = fadd float %1, %2
+  %3 = load float* %num2.addr, align 4
+  %sub = fsub float %add, %3
+  %4 = load float** %acc.addr, align 4
+  store float %sub, float* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
new file mode 100644
index 0000000..c0a9fda
--- /dev/null
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -0,0 +1,24 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@num2 = external global i32
+
+; CHECK: allocframe
+; CHECK: dealloc_return
+
+define i32 @foo() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32* @num, align 4
+  store i32 %0, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %2 = load i32* @acc, align 4
+  %mul = mul nsw i32 %1, %2
+  %3 = load i32* @num2, align 4
+  %add = add nsw i32 %mul, %3
+  store i32 %add, i32* %i, align 4
+  %4 = load i32* %i, align 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
new file mode 100644
index 0000000..24324b2
--- /dev/null
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Hexagon' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
new file mode 100644
index 0000000..afd6fc6
--- /dev/null
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -0,0 +1,20 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: += mpyi
+
+define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
+entry:
+  %acc.addr = alloca i32, align 4
+  %num.addr = alloca i32, align 4
+  %num2.addr = alloca i32, align 4
+  store i32 %acc, i32* %acc.addr, align 4
+  store i32 %num, i32* %num.addr, align 4
+  store i32 %num2, i32* %num2.addr, align 4
+  %0 = load i32* %num.addr, align 4
+  %1 = load i32* %acc.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* %num2.addr, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* %num.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
new file mode 100644
index 0000000..c63a3ba
--- /dev/null
+++ b/test/CodeGen/Hexagon/static.ll
@@ -0,0 +1,21 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@val = external global i32
+
+; CHECK: CONST32(#num)
+; CHECK: CONST32(#acc)
+; CHECK: CONST32(#val)
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @num, align 4
+  %1 = load i32* @acc, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* @val, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* @num, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
new file mode 100644
index 0000000..2c962d0
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -0,0 +1,16 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+
+%struct.small = type { i32, i32 }
+
+@s1 = common global %struct.small zeroinitializer, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+  call void @bar(i64 %0)
+  ret void
+}
+
+declare void @bar(i64)
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
new file mode 100644
index 0000000..69de4f6
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
+; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
+; CHECK: memw(r29 + #0) = r[[T1]]
+
+%struct.large = type { i64, i64 }
+
+@s2 = common global %struct.large zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  call void @bar(%struct.large* byval @s2)
+  ret void
+}
+
+declare void @bar(%struct.large* byval)
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
new file mode 100644
index 0000000..788e474
--- /dev/null
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i32
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
+  store i32 %2, i32* @k, align 4
+  ret void
+}
+
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
index b1eb22a..827fd32 100644
--- a/test/CodeGen/MBlaze/cc.ll
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -222,8 +222,8 @@ define void @testing() {
 
     %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8)
-    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -235,9 +235,9 @@ define void @testing() {
 
     %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8, i32 9)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
     ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -249,10 +249,10 @@ define void @testing() {
 
     %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i32 8, i32 9, i32 10)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 36}}
     ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
deleted file mode 100644
index bfd5e47..0000000
--- a/test/CodeGen/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll
index fae9830..621784a 100644
--- a/test/CodeGen/MBlaze/div.ll
+++ b/test/CodeGen/MBlaze/div.ll
@@ -13,14 +13,14 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i8 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i8 %tmp.1, %tmp.2
     ret i8 %tmp.3
@@ -36,14 +36,14 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i16 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i16 %tmp.1, %tmp.2
     ret i16 %tmp.3
@@ -59,14 +59,14 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i32 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i32 %tmp.1, %tmp.2
     ret i32 %tmp.3
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..e236200
--- /dev/null
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index f339373..4c7d2d0 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,9 +7,9 @@ target triple = "msp430-unknown-linux-gnu"
 
 define void @uip_arp_arpin() nounwind {
 entry:
-	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%tmp = load volatile i16* @uip_len		; <i16> [#uses=1]
 	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
-	volatile store i16 0, i16* @uip_len
+	store volatile i16 0, i16* @uip_len
 	br i1 %cmp, label %if.then, label %if.end
 
 if.then:		; preds = %entry
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index 088d3e1..e8c0d14a 100644
--- a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -6,8 +6,8 @@ target triple = "msp430-generic-generic"
 define i16 @foo() nounwind readnone {
 entry:
   %result = alloca i16, align 1                   ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result
-  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  store volatile i16 0, i16* %result
+  %tmp = load volatile i16* %result               ; <i16> [#uses=1]
   ret i16 %tmp
 }
 
@@ -22,8 +22,8 @@ while.cond:                                       ; preds = %while.cond, %entry
 
 while.end:                                        ; preds = %while.cond
   %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result.i
-  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  store volatile i16 0, i16* %result.i
+  %tmp.i = load volatile i16* %result.i           ; <i16> [#uses=0]
   ret i16 0
 }
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 4d7d9b9..9fab482 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,10 +11,10 @@ entry:
   %x.addr = alloca i8                             ; <i8*> [#uses=2]
   %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
   store i8 %x, i8* %x.addr
-  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  %tmp1 = load volatile i8* @"\010x0021"          ; <i8> [#uses=1]
   store i8 %tmp1, i8* %tmp
   %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
-  volatile store i8 %tmp2, i8* @"\010x0021"
+  store volatile i8 %tmp2, i8* @"\010x0021"
   %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
   store i8 %tmp3, i8* %retval
   %0 = load i8* %retval                           ; <i8> [#uses=1]
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index 856eb9d..c1a186a 100644
--- a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,9 +4,9 @@ define void @foo() nounwind {
 entry:
 	%r = alloca i8		; <i8*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	load volatile i8* %r, align 1		; <i8>:0 [#uses=1]
 	or i8 %0, 1		; <i8>:1 [#uses=1]
-	volatile store i8 %1, i8* %r, align 1
+	store volatile i8 %1, i8* %r, align 1
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 4f9a724..c7ecb5a 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -32,7 +32,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 17ebd87..727c29f 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -35,9 +35,9 @@ define void @am3(i16 %i, i8 %x) nounwind {
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %x, %1
-	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 6676b88..7cd345b 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -29,7 +29,7 @@ define i8 @am3(i16 %n) nounwind {
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index 4b327b0..5eeb02f 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -29,7 +29,7 @@ define void @am3(i16 %i, i8 %a) nounwind {
 ; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
-	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %a, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index 2337c2c..d4ae811 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -64,6 +64,6 @@ entry:
  %0 = load i16* %retval                          ; <i16> [#uses=1]
  ret i16 %0
 ; CHECK: mov2:
-; CHECK:	mov.w	0(r1), 4(r1)
 ; CHECK:	mov.w	2(r1), 6(r1)
+; CHECK:	mov.w	0(r1), 4(r1)
 }
diff --git a/test/CodeGen/MSP430/dg.exp b/test/CodeGen/MSP430/dg.exp
deleted file mode 100644
index e4ea13a..0000000
--- a/test/CodeGen/MSP430/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MSP430] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
index 93cfb25..dc2abf5 100644
--- a/test/CodeGen/MSP430/indirectbr2.ll
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -5,7 +5,7 @@ define internal i16 @foo(i16 %i) nounwind {
 entry:
   %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
   %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
-; CHECK: mov.w   .LC.0.2070(r15), pc
+; CHECK: mov.w   .LC.0.2070(r12), pc
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 L5:                                               ; preds = %bb2
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
new file mode 100644
index 0000000..972732e
--- /dev/null
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MSP430' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9d8e391..c61e1cd 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,19 +1,22 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep subu %t | count 2
-; RUN: grep addu %t | count 4
-
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i64 @add64(i64 %u, i64 %v) nounwind  {
 entry:
-	%tmp2 = add i64 %u, %v	
+; CHECK: addu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: addu
+  %tmp2 = add i64 %u, %v  
   ret i64 %tmp2
 }
 
 define i64 @sub64(i64 %u, i64 %v) nounwind  {
 entry:
+; CHECK: sub64
+; CHECK: subu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: subu
   %tmp2 = sub i64 %u, %v
   ret i64 %tmp2
 }
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index b1d20d9..afec7f6 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,17 +1,18 @@
-; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc -march=mips < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.sret0 = type { i32, i32, i32 }
+%struct.sret0 = type { i32, i32, i32 }
 
 define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
 entry:
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
-	store i32 %dummy, i32* %0, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	store i32 %dummy, i32* %1, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
-	store i32 %dummy, i32* %2, align 4
-	ret void
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 0    ; <i32*>:0 [#uses=1]
+  store i32 %dummy, i32* %0, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 1    ; <i32*>:1 [#uses=1]
+  store i32 %dummy, i32* %1, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 2    ; <i32*>:2 [#uses=1]
+  store i32 %dummy, i32* %2, align 4
+  ret void
 }
 
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index d804c7d..4c55236 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,16 +1,17 @@
-; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @fptoint(float %a) nounwind {
 entry:
-	fptosi float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: trunc.w.s 
+  fptosi float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
 
 define i32 @fptouint(float %a) nounwind {
 entry:
-	fptoui float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: fptouint
+; CHECK: trunc.w.s 
+; CHECK: trunc.w.s 
+  fptoui float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index e0c745f..8479ad2 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,20 +1,16 @@
-; DISABLED: llc < %s -march=mips -o %t
-; DISABLED: grep seh %t | count 1
-; DISABLED: grep seb %t | count 1
-; RUN: false
-; XFAIL: *
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
+; CHECK: seb
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
 define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
+; CHECK: seh
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 94dfe35..a8e5470 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
-; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @F(float %a) nounwind {
+; CHECK: .rodata.cst4,"aM",@progbits 
 entry:
-	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
-	ret float %1
+; CHECK: ($CPI0_{{[0-1]}})
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+  fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+  fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+  ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index 23ed64a..dbde742 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,17 +1,53 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mfhi  %t | count 1
-; RUN: grep mflo  %t | count 1
-; RUN: grep multu %t | count 1
+; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.DWstruct = type { i32, i32 }
+%struct.DWstruct = type { i32, i32 }
 
 define i32 @A0(i32 %u, i32 %v) nounwind  {
 entry:
-	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
-	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
-	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+; CHECK: multu 
+; CHECK: mflo
+; CHECK: mfhi
+  %asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+  %asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+  %asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1    ; <i32> [#uses=1]
   %res = add i32 %asmresult, %asmresult1
-	ret i32 %res
+  ret i32 %res
 }
+
+@gi2 = external global i32
+@gi1 = external global i32
+@gi0 = external global i32
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define void @foo0() nounwind {
+entry:
+; CHECK: addu
+  %0 = load i32* @gi1, align 4
+  %1 = load i32* @gi0, align 4
+  %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
+  store i32 %2, i32* @gi2, align 4
+  ret void
+}
+
+define void @foo2() nounwind {
+entry:
+; CHECK: neg.s
+  %0 = load float* @gf1, align 4
+  %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
+  store float %1, float* @gf0, align 4
+  ret void
+}
+
+define void @foo3() nounwind {
+entry:
+; CHECK: neg.d
+  %0 = load double* @gd1, align 8
+  %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
+  store double %1, double* @gd0, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f8eb028..78a49ff 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,18 +1,15 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mtc1 %t | count 1
-; RUN: grep mfc1 %t | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @A(i32 %u) nounwind  {
 entry:
-	bitcast i32 %u to float
-	ret float %0
+; CHECK: mtc1 
+  bitcast i32 %u to float
+  ret float %0
 }
 
 define i32 @B(float %u) nounwind  {
 entry:
-	bitcast float %u to i32
-	ret i32 %0
+; CHECK: mfc1 
+  bitcast float %u to i32
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 6dd4af1..0d94b19 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
-; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
-	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
-	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
-	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
-	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
-	add i32 %3, %2		; <i32>:4 [#uses=1]
-	ret i32 %4
+; CHECK: subu ${{[0-9]+}}, $sp
+; CHECK: subu ${{[0-9]+}}, $sp
+  alloca i8, i32 %size    ; <i8*>:0 [#uses=1]
+  alloca i8, i32 %size    ; <i8*>:1 [#uses=1]
+  call i32 @foo( i8* %0 ) nounwind    ; <i32>:2 [#uses=1]
+  call i32 @foo( i8* %1 ) nounwind    ; <i32>:3 [#uses=1]
+  add i32 %3, %2    ; <i32>:4 [#uses=1]
+  ret i32 %4
 }
 
 declare i32 @foo(i8*)
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index fb33323..abd61de 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=mips | grep clz | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @A0(i32 %u) nounwind  {
 entry:
-	call i32 @llvm.ctlz.i32( i32 %u )
+; CHECK: clz 
+  call i32 @llvm.ctlz.i32( i32 %u, i1 true )
   ret i32 %0
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index f518843..9c4838a 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s
+; RUN: llc -march=mips -soft-float < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "psp"
 	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
 	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
 	%struct.__sbuf = type { i8*, i32 }
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index b8d6826..2b2ee0f 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,10 +1,23 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
-target triple = "mips-unknown-linux"
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 define float @h() nounwind readnone {
 entry:
-; CHECK: lw $2, %got($CPI0_0)($gp)
-; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($CPI0_0)
+; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; STATIC-O32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
   ret float 0x400B333340000000
 }
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 07fc10c..aaf6767 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,13 +1,25 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
 
 define i32 @main() nounwind readnone {
 entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
-  volatile store i32 2, i32* %x, align 4
-  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
-; CHECK: lui $3, %hi($JTI0_0)
-; CHECK: sll $2, $2, 2
-; CHECK: addiu $3, $3, %lo($JTI0_0)
+  store volatile i32 2, i32* %x, align 4
+  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-O32: jr  $[[R1]]
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
+; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-N64: jr  $[[R1]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -18,7 +30,7 @@ entry:
 bb1:                                              ; preds = %entry
   ret i32 2
 
-; CHECK: $BB0_2
+; CHECK: STATIC-O32: $BB0_2
 bb2:                                              ; preds = %entry
   ret i32 0
 
@@ -31,3 +43,23 @@ bb4:                                              ; preds = %entry
 bb5:                                              ; preds = %entry
   ret i32 1
 }
+
+; STATIC-O32: .align  2
+; STATIC-O32: $JTI0_0:
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; PIC-O32: .align  2
+; PIC-O32: $JTI0_0:
+; PIC-O32: .gpword
+; PIC-O32: .gpword
+; PIC-O32: .gpword 
+; PIC-O32: .gpword 
+; PIC-N64: .align  3
+; PIC-N64: $JTI0_0:
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword 
+; PIC-N64: .gpdword 
+
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
index c592b31..6174500 100644
--- a/test/CodeGen/Mips/2010-11-09-CountLeading.ll
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -3,16 +3,16 @@
 ; CHECK: clz $2, $4
 define i32 @t1(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 ; CHECK: clz $2, $4
 define i32 @t2(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
@@ -20,7 +20,7 @@ entry:
 define i32 @t3(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
 
@@ -28,6 +28,6 @@ entry:
 define i32 @t4(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 6de6b77..7de7fa6 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 @reg = common global i8* null, align 4
 
@@ -8,14 +12,30 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; PIC-O32: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; STATIC-O32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-O32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N32: lw  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-N32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N64: ld  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N64: ld  $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
+; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
new file mode 100644
index 0000000..1b5513a
--- /dev/null
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
+entry:
+  br label %bosco
+
+bosco:                                            ; preds = %bosco, %entry
+  br label %bosco
+}
+
+; CHECK-PIC: b	$BB0_1
+; CHECK-STATIC: j	$BB0_1
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
new file mode 100644
index 0000000..a8fc2cd
--- /dev/null
+++ b/test/CodeGen/Mips/bswap.ll
@@ -0,0 +1,25 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+define i32 @bswap32(i32 %x) nounwind readnone {
+entry:
+; MIPS32: bswap32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+  %or.3 = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %or.3
+}
+
+define i64 @bswap64(i64 %x) nounwind readnone {
+entry:
+; MIPS64: bswap64:
+; MIPS64: dsbh $[[R0:[0-9]+]]
+; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+  %or.7 = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %or.7
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 7851ba9..03254a9 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,11 +1,14 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; N64:  ld  ${{[0-9]+}}, %got_disp(i3)($gp)
+; N64:  daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -17,10 +20,14 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; CHECK: cmov2:
-; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
-; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; O32: cmov2:
+; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; N64: cmov2:
+; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index 391f5c7..57d022f 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,11 +1,9 @@
-; DISABLED: llc -march=mipsel < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel < %s | FileCheck %s
 
 ; CHECK: .set macro
+; CHECK: .set at
 ; CHECK-NEXT: .cprestore
+; CHECK: .set noat
 ; CHECK-NEXT: .set nomacro
 
 %struct.S = type { [16384 x i32] }
diff --git a/test/CodeGen/Mips/dg.exp b/test/CodeGen/Mips/dg.exp
deleted file mode 100644
index adb2cac..0000000
--- a/test/CodeGen/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 9cd3413..c3facdb 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -10,15 +10,11 @@ entry:
 ; CHECK-EL:  .cfi_def_cfa_offset
 ; CHECK-EL:  sdc1 $f20
 ; CHECK-EL:  sw  $ra
-; CHECK-EL:  sw  $17
-; CHECK-EL:  sw  $16
 ; CHECK-EL:  .cfi_offset 52, -8
 ; CHECK-EL:  .cfi_offset 53, -4
 ; CHECK-EB:  .cfi_offset 53, -8
 ; CHECK-EB:  .cfi_offset 52, -4
 ; CHECK-EL:  .cfi_offset 31, -12
-; CHECK-EL:  .cfi_offset 17, -16
-; CHECK-EL:  .cfi_offset 16, -20
 ; CHECK-EL:  .cprestore 
 
   %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
@@ -58,16 +54,10 @@ unreachable:                                      ; preds = %entry
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
-declare void @llvm.eh.resume(i8*, i32)
-
 declare void @__cxa_throw(i8*, i8*, i8*)
 
 declare i8* @__cxa_begin_catch(i8*)
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 69f53e5..a164f70 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
new file mode 100644
index 0000000..b296ab3
--- /dev/null
+++ b/test/CodeGen/Mips/fabs.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+
+define float @foo0(float %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f0
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
+; NO-NAN: abs.s
+
+  %call = tail call float @fabsf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @fabsf(float) nounwind readnone
+
+define double @foo1(double %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f1
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
+; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
+; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dmtc1   $[[AND]], $f0
+
+; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+; NO-NAN: abs.d
+
+  %call = tail call double @fabs(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @fabs(double) nounwind readnone
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
new file mode 100644
index 0000000..b36473d
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -0,0 +1,50 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+
+declare double @copysign(double, double) nounwind readnone
+
+declare float @copysignf(float, float) nounwind readnone
+
+define float @func2(float %d, double %f) nounwind readnone {
+entry:
+; 64: func2
+; 64: lui  $[[T0:[0-9]+]], 32767
+; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
+; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
+; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
+; 64: mtc1 $[[OR]], $f0
+
+; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
+; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 64R2: mtc1 $[[INS]], $f0
+
+  %add = fadd float %d, 1.000000e+00
+  %conv = fptrunc double %f to float
+  %call = tail call float @copysignf(float %add, float %conv) nounwind readnone
+  ret float %call
+}
+
+define double @func3(double %d, float %f) nounwind readnone {
+entry:
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
+; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
+; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+  %add = fadd double %d, 1.000000e+00
+  %conv = fpext float %f to double
+  %call = tail call double @copysign(double %add, double %conv) nounwind readnone
+  ret double %call
+}
+
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 79f956d..1c57eca 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,34 +1,35 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; CHECK-EL: func0:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EL: mtc1 $[[LO0]], $f0
-; CHECK-EL: mtc1 $[[OR]], $f1
 ;
-; CHECK-EB: lui $[[T0:[0-9]+]], 32767
-; CHECK-EB: lui $[[T1:[0-9]+]], 32768
-; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; CHECK-EB: mtc1 $[[OR]], $f0
-; CHECK-EB: mtc1 $[[LO0]], $f1
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f1
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
+; 64: and    $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 64: daddiu $[[MSK0:[0-9]+]], $[[MSK1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: dext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dins  $[[INS:[0-9]+]], $[[EXT]], 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -37,19 +38,22 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; CHECK-EL: func1:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; CHECK-EL: mtc1 $[[T4]], $f0
+
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f0
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
 
 declare float @copysignf(float, float) nounwind readnone
+
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
new file mode 100644
index 0000000..435b419
--- /dev/null
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+
+define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %add1 = fadd float %add, 0.000000e+00
+  ret float %add1
+}
+
+define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %add = fadd float %sub, 0.000000e+00
+  ret float %add
+}
+
+define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmadd.s 
+; 64R2: nmadd.s 
+; 32R2NAN: madd.s 
+; 64R2NAN: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %sub = fsub float 0.000000e+00, %add
+  ret float %sub
+}
+
+define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmsub.s 
+; 64R2: nmsub.s 
+; 32R2NAN: msub.s 
+; 64R2NAN: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %sub1 = fsub float 0.000000e+00, %sub
+  ret float %sub1
+}
+
+define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: madd.d
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %add1 = fadd double %add, 0.000000e+00
+  ret double %add1
+}
+
+define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: msub.d
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %add = fadd double %sub, 0.000000e+00
+  ret double %add
+}
+
+define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmadd.d 
+; 64R2: nmadd.d 
+; 32R2NAN: madd.d 
+; 64R2NAN: madd.d 
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %sub = fsub double 0.000000e+00, %add
+  ret double %sub
+}
+
+define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmsub.d 
+; 64R2: nmsub.d 
+; 32R2NAN: msub.d 
+; 64R2NAN: msub.d 
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %sub1 = fsub double 0.000000e+00, %sub
+  ret double %sub1
+}
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
new file mode 100644
index 0000000..b322abd
--- /dev/null
+++ b/test/CodeGen/Mips/fneg.ll
@@ -0,0 +1,17 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+
+define float @foo0(i32 %a, float %d) nounwind readnone {
+entry:
+; CHECK-NOT: neg.s
+  %sub = fsub float -0.000000e+00, %d
+  ret float %sub
+}
+
+define double @foo1(i32 %a, double %d) nounwind readnone {
+entry:
+; CHECK:     foo1
+; CHECK-NOT: neg.d
+; CHECK:     jr
+  %sub = fsub double -0.000000e+00, %d
+  ret double %sub
+}
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
new file mode 100644
index 0000000..08bd6e7
--- /dev/null
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load float* %arrayidx1, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  store float %0, float* %arrayidx1, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load double* %arrayidx1, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  store double %0, double* %arrayidx1, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll
deleted file mode 100644
index 86545e3..0000000
--- a/test/CodeGen/Mips/fpcmp.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS32
-
-@g1 = external global i32
-
-define i32 @f(float %f0, float %f1) nounwind {
-entry:
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-  %cmp = fcmp olt float %f0, %f1
-  %conv = zext i1 %cmp to i32
-  %tmp2 = load i32* @g1, align 4
-  %add = add nsw i32 %tmp2, %conv
-  store i32 %add, i32* @g1, align 4
-  %cond = select i1 %cmp, i32 10, i32 20
-  ret i32 %cond
-}
diff --git a/test/CodeGen/Mips/frem.ll b/test/CodeGen/Mips/frem.ll
new file mode 100644
index 0000000..be222b2
--- /dev/null
+++ b/test/CodeGen/Mips/frem.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel 
+
+define float @fmods(float %x, float %y) {
+entry:
+  %r = frem float %x, %y
+  ret float %r
+}
+
+define double @fmodd(double %x, double %y) {
+entry:
+  %r = frem double %x, %y
+  ret double %r
+}
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
new file mode 100644
index 0000000..0d49a74
--- /dev/null
+++ b/test/CodeGen/Mips/global-address.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+
+@s1 = internal unnamed_addr global i32 8, align 4
+@g1 = external global i32
+
+define void @foo() nounwind {
+entry:
+; PIC-O32: lw  $[[R0:[0-9]+]], %got(s1)
+; PIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R0]])
+; PIC-O32: lw  ${{[0-9]+}}, %got(g1)
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-O32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N32: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N32: lw  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-N32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N64: ld  $[[R1:[0-9]+]], %got_page(s1)
+; STATIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
+; STATIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+
+  %0 = load i32* @s1, align 4
+  tail call void @foo1(i32 %0) nounwind
+  %1 = load i32* @g1, align 4
+  store i32 %1, i32* @s1, align 4
+  %add = add nsw i32 %1, 2
+  store i32 %add, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo1(i32)
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
new file mode 100644
index 0000000..174d1f9
--- /dev/null
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; CHECK-NOT:    .cpload
+; CHECK-NOT:    .cprestore
+; CHECK: lui    $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu  $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
+; CHECK: addu   $[[GP:[0-9]+]], $[[R1]], $25
+; CHECK: lw     ${{[0-9]+}}, %call16(foo2)($[[GP]])
+
+  tail call void @foo2(i32* @g0) nounwind
+  tail call void @foo2(i32* @g1) nounwind
+  tail call void @foo2(i32* @g2) nounwind
+  ret void
+}
+
+declare void @foo2(i32*)
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 87cf2a6..8b1f71b 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -4,21 +4,21 @@ define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
 ; CHECK: addu $[[R1:[0-9]+]], $zero, $5
 ; CHECK: addu $[[R0:[0-9]+]], $zero, $4
-; CHECK: lw  $25, %call16(ff1)
 ; CHECK: ori $6, ${{[0-9]+}}, 3855
 ; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: lw  $25, %call16(ff1)
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
 ; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 88($sp)
-; CHECK: lw $[[R3:[0-9]+]], 92($sp)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
 ; CHECK: addu $4, $zero, $[[R2]]
 ; CHECK: addu $5, $zero, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
-; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: sw $[[R1]], 28($sp)
+; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: lw $25, %call16(ff3)
 ; CHECK: addu $6, $zero, $[[R2]]
 ; CHECK: addu $7, $zero, $[[R3]]
diff --git a/test/CodeGen/Mips/imm.ll b/test/CodeGen/Mips/imm.ll
new file mode 100644
index 0000000..eea391e
--- /dev/null
+++ b/test/CodeGen/Mips/imm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @foo0() nounwind readnone {
+entry:
+; CHECK: foo0
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i32 305419896
+}
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i32 @foo2() nounwind readnone {
+entry:
+; CHECK: foo2
+; CHECK: addiu ${{[0-9]+}}, $zero, 4660
+  ret i32 4660
+}
+
+define i32 @foo17() nounwind readnone {
+entry:
+; CHECK: foo17
+; CHECK: addiu ${{[0-9]+}}, $zero, -32204
+  ret i32 -32204
+}
+
+define i32 @foo18() nounwind readnone {
+entry:
+; CHECK: foo18
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i32 33332
+}
diff --git a/test/CodeGen/Mips/indirectcall.ll b/test/CodeGen/Mips/indirectcall.ll
new file mode 100644
index 0000000..ac565d6
--- /dev/null
+++ b/test/CodeGen/Mips/indirectcall.ll
@@ -0,0 +1,8 @@
+; RUN: llc  < %s -march=mipsel -relocation-model=static | FileCheck %s 
+
+define void @foo0(void (i32)* nocapture %f1) nounwind {
+entry:
+; CHECK: jalr $25
+  tail call void %f1(i32 13) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
new file mode 100644
index 0000000..dbce3c39
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+@gl2 = external global i64
+@gl1 = external global i64
+@gl0 = external global i64
+
+define void @foo1() nounwind {
+entry:
+; CHECK: foo1
+; CHECK: daddu
+  %0 = load i64* @gl1, align 8
+  %1 = load i64* @gl0, align 8
+  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
+  store i64 %2, i64* @gl2, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index b5db58a..4b31a88 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -8,10 +8,10 @@ entry:
 ; CHECK: #APP
 ; CHECK: sw $4, 0($[[T0]])
 ; CHECK: #NO_APP
-; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: #APP
 ; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
 ; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: sw  $[[T3]], 0($[[T1]])
 
   %l1 = alloca i32, align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 579a319..b7c9a9c 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,8 +1,4 @@
-; DISABLED: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 %struct.S1 = type { [65536 x i8] }
 
@@ -11,8 +7,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
-; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -24
+; CHECK:  addiu $at, $at, -24
+; CHECK:  addu  $sp, $sp, $at
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
new file mode 100644
index 0000000..0587d32
--- /dev/null
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
new file mode 100644
index 0000000..09745fb
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx2, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store float %0, float* %arrayidx2, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx2, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store double %0, double* %arrayidx2, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
new file mode 100644
index 0000000..b2b67e5
--- /dev/null
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @t1(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @t3(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo 
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
+
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
new file mode 100644
index 0000000..fa81b72
--- /dev/null
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+@gl = global i64 1250999896321, align 8
+
+; CHECK: 8byte
+define i64 @foo1() nounwind readonly {
+entry:
+  %0 = load i64* @gl, align 8
+  ret i64 %0
+}
+
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
new file mode 100644
index 0000000..02a35f8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -0,0 +1,26 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+
+define i64 @zext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; CHECK: dsrl ${{[0-9]+}}, $[[R1]], 32
+  %add = add i32 %a, 2
+  %conv = zext i32 %add to i64
+  ret i64 %conv
+}
+
+define i64 @sext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+  %conv = sext i32 %a to i64
+  ret i64 %conv
+}
+
+define i64 @i64_float(float %f) nounwind readnone {
+entry:
+; CHECK: trunc.l.s 
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
diff --git a/test/CodeGen/Mips/mips64extins.ll b/test/CodeGen/Mips/mips64extins.ll
new file mode 100644
index 0000000..14f92ca
--- /dev/null
+++ b/test/CodeGen/Mips/mips64extins.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s 
+
+define i64 @dext(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 1023
+  ret i64 %and
+}
+
+define i64 @dextm(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 34
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 17179869183
+  ret i64 %and
+}
+
+define i64 @dextu(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 34, 6
+  %shr = lshr i64 %i, 34
+  %and = and i64 %shr, 63
+  ret i64 %and
+}
+
+define i64 @dins(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10
+  %shl2 = shl i64 %j, 8
+  %and = and i64 %shl2, 261888
+  %and3 = and i64 %i, -261889
+  %or = or i64 %and3, %and
+  ret i64 %or
+}
+
+define i64 @dinsm(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 10, 33
+  %shl4 = shl i64 %j, 10
+  %and = and i64 %shl4, 8796093021184
+  %and5 = and i64 %i, -8796093021185
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
+
+define i64 @dinsu(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 40, 13
+  %shl4 = shl i64 %j, 40
+  %and = and i64 %shl4, 9006099743113216
+  %and5 = and i64 %i, -9006099743113217
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
new file mode 100644
index 0000000..17716da
--- /dev/null
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -0,0 +1,7 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+define double @foo1() nounwind readnone {
+entry:
+; CHECK: dmtc1 $zero
+  ret double 0.000000e+00
+}
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index b8f3ca9..24647b2 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
@@ -12,7 +12,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f0, align 4
   ret float %0
@@ -24,7 +24,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d0, align 8 
   ret double %0
@@ -36,7 +36,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f1, align 4 
   store float %0, float* @f0, align 4 
@@ -49,7 +49,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d1, align 8 
   store double %0, double* @d0, align 8 
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
new file mode 100644
index 0000000..1fc8636
--- /dev/null
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i64 @foo3() nounwind readnone {
+entry:
+; CHECK: foo3
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136
+  ret i64 305419896
+}
+
+define i64 @foo6() nounwind readnone {
+entry:
+; CHECK: foo6
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i64 33332
+}
+
+define i64 @foo7() nounwind readnone {
+entry:
+; CHECK: foo7
+; CHECK: daddiu ${{[0-9]+}}, $zero, -32204
+  ret i64 -32204
+}
+
+define i64 @foo9() nounwind readnone {
+entry:
+; CHECK: foo9
+; CHECK: lui $[[R0:[0-9]+]], 583
+; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18
+; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17
+; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398
+  ret i64 1311768467284833366
+}
+
+define i64 @foo10() nounwind readnone {
+entry:
+; CHECK: foo10
+; CHECK: lui $[[R0:[0-9]+]], 34661
+; CHECK: daddiu  ${{[0-9]+}}, $[[R0]], 17185
+  ret i64 -8690466096928522240
+}
+
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index c9812a2..0418311 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r1 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -116,12 +116,12 @@ entry:
   ret i64 %rem
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclz $2, $4
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
@@ -129,7 +129,7 @@ define i64 @f19(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclo $2, $4
   %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
 }
 
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index fdf496b..0e310a8 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @c = common global i8 0, align 4
 @s = common global i16 0, align 4
@@ -16,7 +16,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: lb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @c, align 4
   %conv = sext i8 %0 to i64
@@ -29,7 +29,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: lh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @s, align 4
   %conv = sext i16 %0 to i64
@@ -42,7 +42,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: lw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @i, align 4
   %conv = sext i32 %0 to i64
@@ -55,7 +55,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: ld ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l, align 8
   ret i64 %0
@@ -67,7 +67,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N64: lbu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(uc)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @uc, align 4
   %conv = zext i8 %0 to i64
@@ -80,7 +80,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N64: lhu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(us)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @us, align 4
   %conv = zext i16 %0 to i64
@@ -93,7 +93,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N64: lwu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(ui)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @ui, align 4
   %conv = zext i32 %0 to i64
@@ -106,7 +106,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: sb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i8
@@ -120,7 +120,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: sh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i16
@@ -134,7 +134,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: sw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i32
@@ -148,7 +148,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: sd ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   store i64 %0, i64* @l, align 8
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
new file mode 100644
index 0000000..54d504f
--- /dev/null
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define void @foo3() nounwind {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, $sp
+  %a = alloca i32, align 4
+  call void @foo1(i32* %a) nounwind
+  ret void
+}
+
+declare void @foo1(i32*)
+
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
new file mode 100644
index 0000000..fd036a2
--- /dev/null
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mflo
+  %mul = mul i64 %a1, %a0
+  ret i64 %mul
+}
+
+define i64 @m1(i64 %a) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mfhi
+  %div = sdiv i64 %a, 3
+  ret i64 %div
+}
+
+define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mflo
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mflo
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mfhi
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mfhi
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index cc5e508..45d1c95 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -44,21 +44,21 @@ entry:
 
 define i64 @f6(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shl = shl i64 %a0, 40
   ret i64 %shl
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = ashr i64 %a0, 40
   ret i64 %shr
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = lshr i64 %a0, 40
   ret i64 %shr
 }
@@ -94,7 +94,7 @@ entry:
 
 define i64 @f12(i64 %a0) nounwind readnone {
 entry:
-; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 54
   %shl = shl i64 %a0, 10
   %shr = lshr i64 %a0, 54
   %or = or i64 %shl, %shr
diff --git a/test/CodeGen/Mips/mipslopat.ll b/test/CodeGen/Mips/mipslopat.ll
index 0279828..1f433b9 100644
--- a/test/CodeGen/Mips/mipslopat.ll
+++ b/test/CodeGen/Mips/mipslopat.ll
@@ -6,7 +6,7 @@
 
 define void @simple_vol_file() nounwind {
 entry:
-  %tmp = volatile load i32** @stat_vol_ptr_int, align 4
+  %tmp = load volatile i32** @stat_vol_ptr_int, align 4
   %0 = bitcast i32* %tmp to i8*
   call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
   %tmp1 = load i32** @stat_ptr_vol_int, align 4
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index e673480..c5cbc7a 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: sw  $[[R7]], 20($sp)
-; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R2]], 16($sp)
 ; CHECK: lw  $7, 4($[[R0]])
+; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
@@ -44,20 +44,20 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
 define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: lw  $4, 88($sp)
 ; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
 ; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw  $4, 88($sp)
-; CHECK: sw  $[[R3]], 16($sp)
-; CHECK: sw  $[[R4]], 20($sp)
-; CHECK: sw  $[[R2]], 24($sp)
-; CHECK: sw  $[[R1]], 28($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R3]], 16($sp)
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
@@ -81,12 +81,12 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
 define void @f3(%struct.S2* nocapture byval %s2) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $4, 56($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
-; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $4, 56($sp)
 ; CHECK: lw  $4, 56($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -100,14 +100,14 @@ entry:
 define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: lw  $4, 68($sp)
 ; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
-; CHECK: lw  $4, 68($sp)
-; CHECK: sw  $[[R1]], 24($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
   %tmp = load i32* %i, align 4, !tbaa !0
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 4cc48f0..d1a67fd 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,19 +1,20 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=mips > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep call.*\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep lw.*\\\$baz %t
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define private void @foo() {
-        ret void
+; CHECK: foo:
+  ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
+; CHECK: bar:
+; CHECK: call16($foo)
+; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
+; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
+  call void @foo()
+  %1 = load i32* @baz, align 4
+  ret i32 %1
 }
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 8e27f4a..4f3cfb7 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 ; CHECK:  rotrv $2, $4
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
new file mode 100644
index 0000000..da1e036
--- /dev/null
+++ b/test/CodeGen/Mips/swzero.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.unaligned = type <{ i32 }>
+
+define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
+entry:
+; CHECK: usw $zero
+  %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+  store i32 0, i32* %x, align 1
+  ret void
+}
+
+define void @zero_a(i32* nocapture %p) nounwind {
+entry:
+; CHECK: sw $zero
+  store i32 0, i32* %p, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b0474b4..a3c4768 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s \
 ; RUN:                             | FileCheck %s -check-prefix=STATIC
-
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
@@ -39,8 +40,32 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
+; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
+; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+; STATIC:   lui     $gp, %hi(__gnu_local_gp)
+; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
 ; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
 ; STATIC:   lw      $2, 0($[[R1]])
 }
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+; CHECK: f3:
+
+; PIC:   addiu   $4, $gp, %tlsldm(f3.i)
+; PIC:   jalr    $25
+; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
+; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
+; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
+
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
+
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 433e896..6a087ba 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,27 +9,27 @@
 
 define void @foo1() nounwind {
 entry:
-; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: ulhu  $4, 2
+; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: lw  $[[R0:[0-9]+]], %got(s4)
 ; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
-; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
 ; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
+; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
+; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 ; CHECK-EL: ulw $4, 0($[[R0]])
 ; CHECK-EL: lw  $25, %call16(foo4)
-; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 
 ; CHECK-EB: ulhu  $[[R0:[0-9]+]], 2
-; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: sll $4, $[[R0]], 16
+; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: lw  $[[R1:[0-9]+]], %got(s4)
-; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
 ; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
-; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
 ; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
+; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
+; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
+; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 ; CHECK-EB: ulw $4, 0($[[R1]])
 ; CHECK-EB: lw  $25, %call16(foo4)
-; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
new file mode 100644
index 0000000..b890e1d
--- /dev/null
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @foo0(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movn
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 0, i32 %0
+  ret i32 %cond
+}
+
+define i32 @foo1(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movz
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 %0, i32 0
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
index a643d25..f55070a 100644
--- a/test/CodeGen/PTX/cvt.ll
+++ b/test/CodeGen/PTX/cvt.ll
@@ -172,9 +172,9 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
 ; f32
 
 define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: mov.b32 %f0, 1065353216;
-; CHECK: mov.b32 %f1, 0;
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f0, %f1, %p{{[0-9]+}};
+; CHECK: mov.b32 %f0, 0;
+; CHECK: mov.b32 %f1, 1065353216;
+; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to float
 	ret float %a
@@ -232,9 +232,9 @@ define ptx_device float @cvt_f32_s64(i64 %x) {
 ; f64
 
 define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: mov.b64 %fd0, 4575657221408423936;
-; CHECK: mov.b64 %fd1, 0;
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd0, %fd1, %p{{[0-9]+}};
+; CHECK: mov.b64 %fd0, 0;
+; CHECK: mov.b64 %fd1, 4575657221408423936;
+; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to double
 	ret double %a
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
deleted file mode 100644
index 2c304b5..0000000
--- a/test/CodeGen/PTX/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PTX] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
index 81fd33a..e55820d 100644
--- a/test/CodeGen/PTX/ld.ll
+++ b/test/CodeGen/PTX/ld.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
new file mode 100644
index 0000000..e748f7f
--- /dev/null
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PTX' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
index ad7b341..603c3ba 100644
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -1,8 +1,13 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
 
 define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
 entry:
+; FMA: mad.rn.f32
+; MUL: mul.rn.f32
+; MUL: add.rn.f32
   %a = fmul float %x, %y
   %b = fadd float %a, %z
   ret float %b
@@ -10,6 +15,9 @@ entry:
 
 define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
 entry:
+; FMA: mad.rn.f64
+; MUL: mul.rn.f64
+; MUL: add.rn.f64
   %a = fmul double %x, %y
   %b = fadd double %a, %z
   ret double %b
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index 75555a7..9e501be 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -31,31 +31,31 @@ define ptx_device double @t1_f64() {
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i16 %x
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i32 %x
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i64 %x
 }
 
 define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret float %x
 }
 
 define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret double %x
 }
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
index 09015da..377f173 100644
--- a/test/CodeGen/PTX/parameter-order.ll
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}})
+; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
 define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
 ; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
 	%result = sub i32 %b, %c
diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll
new file mode 100644
index 0000000..f901b20
--- /dev/null
+++ b/test/CodeGen/PTX/printf.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+
+@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
+
+define ptx_device void @t1_printf() {
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+    %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
+	ret void
+}
+
+@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
+
+define ptx_device void @t2_printf() {
+; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
+; CHECK: cvta.local.u64  %rd{{[0-9]+}}, __local{{[0-9+]}};
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
+  ret void
+}
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
index 63ef58c..c794363 100644
--- a/test/CodeGen/PTX/st.ll
+++ b/test/CodeGen/PTX/st.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
deleted file mode 100644
index 57ed250..0000000
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.2.0"
-        %struct.Point = type { double, double, double }
-
-define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
-entry:
-        %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
-        %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
-        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
-        store double %tmp2, double* %tmp
-        %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
-        %tmp7 = load double* %tmp6              ; <double> [#uses=1]
-        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
-        store double %tmp9, double* %tmp6
-        %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
-        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
-        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
-        store double %tmp16, double* %tmp13
-        ret void
-}
-
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index cca9e65..3620b0e 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -2,11 +2,11 @@
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
-        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index e50fac4..d10291e 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index 9f35b83..fb8cdce 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index dd425f5..f256bca 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 7b6d491..e7a1cf6 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -12,7 +12,7 @@ declare void @IODelay(i32)
 
 define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
 entry:
-	volatile store i8 -1, i8* null, align 1
+	store volatile i8 -1, i8* null, align 1
 	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
 	br i1 %tmp28, label %bb107, label %bb
 
@@ -43,7 +43,7 @@ bb68:		; preds = %bb31
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
-	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp53.i = load volatile i16* null, align 2		; <i16> [#uses=2]
 	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
 	br i1 false, label %bb.i, label %bb65.i
 
@@ -59,7 +59,7 @@ bb70.i:		; preds = %bb65.i
 	ret i32 0
 
 _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
-	volatile store i8 -1, i8* %tmp202122.i, align 1
+	store volatile i8 -1, i8* %tmp202122.i, align 1
 	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
 	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
 	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index b73382e..974a99a 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
@@ -7,21 +7,27 @@ target triple = "powerpc-apple-darwin9.6"
 define void @foo() nounwind {
 entry:
 ;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
 ;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  stwx r2, r1, r0
+;CHECK:  ori r3, r3, 34524
+;CHECK:  stwx r2, r1, r3
+; Make sure that the register scavenger returns the same temporary register.
+;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 12, 0, 31
+;CHECK:  ori r3, r3, 34520
+;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
   call void @bar(i8* %x1) nounwind
-  call void asm sideeffect "", "~{cr2}"() nounwind
+  call void asm sideeffect "", "~{cr2},~{cr3}"() nounwind
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  lwzx r2, r1, r0
+;CHECK:  lis r3, 1
+;CHECK:  ori r3, r3, 34524
+;CHECK:  lwzx r2, r1, r3
 ;CHECK:  rlwinm r2, r2, 24, 0, 31
 ;CHECK:  mtcrf 32, r2
   ret void
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
new file mode 100644
index 0000000..6161b55
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+
+@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1
+@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s122(i32 %n1, i32 %n3) nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  %sub = add nsw i32 %n1, -1
+  %cmp316 = icmp slt i32 %sub, 32000
+  br i1 %cmp316, label %entry.split.us, label %for.end.7
+
+entry.split.us:                                   ; preds = %entry
+  %0 = sext i32 %sub to i64
+  %1 = sext i32 %n3 to i64
+  br label %for.body4.lr.ph.us
+
+for.body4.us:                                     ; preds = %for.body4.lr.ph.us, %for.body4.us
+  %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ]
+  %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
+  %indvars.iv.next21 = add i64 %indvars.iv20, 1
+  %sub5.us = sub i64 31999, %indvars.iv20
+  %sext = shl i64 %sub5.us, 32
+  %idxprom.us = ashr exact i64 %sext, 32
+  %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
+  %2 = load float* %arrayidx.us, align 4, !tbaa !5
+  %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %3 = load float* %arrayidx7.us, align 4, !tbaa !5
+  %add8.us = fadd float %3, %2
+  store float %add8.us, float* %arrayidx7.us, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp3.us = icmp slt i32 %4, 32000
+  br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1
+
+for.body4.lr.ph.us:                               ; preds = %entry.split.us, %for.end.us.4
+  %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ]
+  br label %for.body4.us
+
+for.end12:                                        ; preds = %for.end.7, %for.end.us.4
+  %call13 = tail call i64 @clock() nounwind
+  %sub14 = sub nsw i64 %call13, %call1
+  %conv = sitofp i64 %sub14 to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 1)
+  ret i32 0
+
+for.body4.lr.ph.us.1:                             ; preds = %for.body4.us
+  %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.1
+
+for.body4.us.1:                                   ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
+  %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ]
+  %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ]
+  %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1
+  %sub5.us.1 = sub i64 31999, %indvars.iv20.1
+  %sext23 = shl i64 %sub5.us.1, 32
+  %idxprom.us.1 = ashr exact i64 %sext23, 32
+  %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
+  %5 = load float* %arrayidx.us.1, align 4, !tbaa !5
+  %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
+  %6 = load float* %arrayidx7.us.1, align 4, !tbaa !5
+  %add8.us.1 = fadd float %6, %5
+  store float %add8.us.1, float* %arrayidx7.us.1, align 4, !tbaa !5
+  %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
+  %7 = trunc i64 %indvars.iv.next.1 to i32
+  %cmp3.us.1 = icmp slt i32 %7, 32000
+  br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
+
+for.body4.lr.ph.us.2:                             ; preds = %for.body4.us.1
+  %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.2
+
+for.body4.us.2:                                   ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
+  %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ]
+  %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ]
+  %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1
+  %sub5.us.2 = sub i64 31999, %indvars.iv20.2
+  %sext24 = shl i64 %sub5.us.2, 32
+  %idxprom.us.2 = ashr exact i64 %sext24, 32
+  %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
+  %8 = load float* %arrayidx.us.2, align 4, !tbaa !5
+  %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
+  %9 = load float* %arrayidx7.us.2, align 4, !tbaa !5
+  %add8.us.2 = fadd float %9, %8
+  store float %add8.us.2, float* %arrayidx7.us.2, align 4, !tbaa !5
+  %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
+  %10 = trunc i64 %indvars.iv.next.2 to i32
+  %cmp3.us.2 = icmp slt i32 %10, 32000
+  br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
+
+for.body4.lr.ph.us.3:                             ; preds = %for.body4.us.2
+  %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.3
+
+for.body4.us.3:                                   ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
+  %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ]
+  %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ]
+  %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1
+  %sub5.us.3 = sub i64 31999, %indvars.iv20.3
+  %sext25 = shl i64 %sub5.us.3, 32
+  %idxprom.us.3 = ashr exact i64 %sext25, 32
+  %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
+  %11 = load float* %arrayidx.us.3, align 4, !tbaa !5
+  %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
+  %12 = load float* %arrayidx7.us.3, align 4, !tbaa !5
+  %add8.us.3 = fadd float %12, %11
+  store float %add8.us.3, float* %arrayidx7.us.3, align 4, !tbaa !5
+  %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
+  %13 = trunc i64 %indvars.iv.next.3 to i32
+  %cmp3.us.3 = icmp slt i32 %13, 32000
+  br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
+
+for.body4.lr.ph.us.4:                             ; preds = %for.body4.us.3
+  %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.4
+
+for.body4.us.4:                                   ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
+  %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ]
+  %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ]
+  %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1
+  %sub5.us.4 = sub i64 31999, %indvars.iv20.4
+  %sext26 = shl i64 %sub5.us.4, 32
+  %idxprom.us.4 = ashr exact i64 %sext26, 32
+  %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
+  %14 = load float* %arrayidx.us.4, align 4, !tbaa !5
+  %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
+  %15 = load float* %arrayidx7.us.4, align 4, !tbaa !5
+  %add8.us.4 = fadd float %15, %14
+  store float %add8.us.4, float* %arrayidx7.us.4, align 4, !tbaa !5
+  %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
+  %16 = trunc i64 %indvars.iv.next.4 to i32
+  %cmp3.us.4 = icmp slt i32 %16, 32000
+  br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
+
+for.end.us.4:                                     ; preds = %for.body4.us.4
+  %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.us.4 = add nsw i32 %nl.019.us, 5
+  %exitcond.4 = icmp eq i32 %inc.us.4, 200000
+  br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
+
+for.end.7:                                        ; preds = %entry, %for.end.7
+  %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
+  %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.7 = add nsw i32 %nl.019, 8
+  %exitcond.7 = icmp eq i32 %inc.7, 200000
+  br i1 %exitcond.7, label %for.end12, label %for.end.7
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
new file mode 100644
index 0000000..52bf6c7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+@temp = common global float 0.000000e+00, align 4
+
+@.str81 = private unnamed_addr constant [6 x i8] c"s3110\00", align 1
+@.str235 = private unnamed_addr constant [15 x i8] c"S3110\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s3110() nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str81, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.end17, %entry
+  %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
+  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16, !tbaa !5
+  br label %for.cond5.preheader
+
+for.cond5.preheader:                              ; preds = %for.inc15, %for.body
+  %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.inc15 ]
+  %max.139 = phi float [ %0, %for.body ], [ %max.3.15, %for.inc15 ]
+  %xindex.138 = phi i32 [ 0, %for.body ], [ %xindex.3.15, %for.inc15 ]
+  %yindex.137 = phi i32 [ 0, %for.body ], [ %yindex.3.15, %for.inc15 ]
+  br label %for.body7
+
+for.body7:                                        ; preds = %for.body7, %for.cond5.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next.15, %for.body7 ]
+  %max.235 = phi float [ %max.139, %for.cond5.preheader ], [ %max.3.15, %for.body7 ]
+  %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
+  %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
+  %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
+  %1 = load float* %arrayidx9, align 16, !tbaa !5
+  %cmp10 = fcmp ogt float %1, %max.235
+  %2 = trunc i64 %indvars.iv to i32
+  %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
+  %3 = trunc i64 %indvars.iv42 to i32
+  %xindex.3 = select i1 %cmp10, i32 %3, i32 %xindex.234
+  %max.3 = select i1 %cmp10, float %1, float %max.235
+  %indvars.iv.next45 = or i64 %indvars.iv, 1
+  %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
+  %4 = load float* %arrayidx9.1, align 4, !tbaa !5
+  %cmp10.1 = fcmp ogt float %4, %max.3
+  %5 = trunc i64 %indvars.iv.next45 to i32
+  %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
+  %xindex.3.1 = select i1 %cmp10.1, i32 %3, i32 %xindex.3
+  %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
+  %indvars.iv.next.146 = or i64 %indvars.iv, 2
+  %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
+  %6 = load float* %arrayidx9.2, align 8, !tbaa !5
+  %cmp10.2 = fcmp ogt float %6, %max.3.1
+  %7 = trunc i64 %indvars.iv.next.146 to i32
+  %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
+  %xindex.3.2 = select i1 %cmp10.2, i32 %3, i32 %xindex.3.1
+  %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
+  %indvars.iv.next.247 = or i64 %indvars.iv, 3
+  %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
+  %8 = load float* %arrayidx9.3, align 4, !tbaa !5
+  %cmp10.3 = fcmp ogt float %8, %max.3.2
+  %9 = trunc i64 %indvars.iv.next.247 to i32
+  %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
+  %xindex.3.3 = select i1 %cmp10.3, i32 %3, i32 %xindex.3.2
+  %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
+  %indvars.iv.next.348 = or i64 %indvars.iv, 4
+  %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
+  %10 = load float* %arrayidx9.4, align 16, !tbaa !5
+  %cmp10.4 = fcmp ogt float %10, %max.3.3
+  %11 = trunc i64 %indvars.iv.next.348 to i32
+  %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
+  %xindex.3.4 = select i1 %cmp10.4, i32 %3, i32 %xindex.3.3
+  %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
+  %indvars.iv.next.449 = or i64 %indvars.iv, 5
+  %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
+  %12 = load float* %arrayidx9.5, align 4, !tbaa !5
+  %cmp10.5 = fcmp ogt float %12, %max.3.4
+  %13 = trunc i64 %indvars.iv.next.449 to i32
+  %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
+  %xindex.3.5 = select i1 %cmp10.5, i32 %3, i32 %xindex.3.4
+  %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
+  %indvars.iv.next.550 = or i64 %indvars.iv, 6
+  %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
+  %14 = load float* %arrayidx9.6, align 8, !tbaa !5
+  %cmp10.6 = fcmp ogt float %14, %max.3.5
+  %15 = trunc i64 %indvars.iv.next.550 to i32
+  %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
+  %xindex.3.6 = select i1 %cmp10.6, i32 %3, i32 %xindex.3.5
+  %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
+  %indvars.iv.next.651 = or i64 %indvars.iv, 7
+  %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
+  %16 = load float* %arrayidx9.7, align 4, !tbaa !5
+  %cmp10.7 = fcmp ogt float %16, %max.3.6
+  %17 = trunc i64 %indvars.iv.next.651 to i32
+  %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
+  %xindex.3.7 = select i1 %cmp10.7, i32 %3, i32 %xindex.3.6
+  %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
+  %indvars.iv.next.752 = or i64 %indvars.iv, 8
+  %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
+  %18 = load float* %arrayidx9.8, align 16, !tbaa !5
+  %cmp10.8 = fcmp ogt float %18, %max.3.7
+  %19 = trunc i64 %indvars.iv.next.752 to i32
+  %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
+  %xindex.3.8 = select i1 %cmp10.8, i32 %3, i32 %xindex.3.7
+  %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
+  %indvars.iv.next.853 = or i64 %indvars.iv, 9
+  %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
+  %20 = load float* %arrayidx9.9, align 4, !tbaa !5
+  %cmp10.9 = fcmp ogt float %20, %max.3.8
+  %21 = trunc i64 %indvars.iv.next.853 to i32
+  %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
+  %xindex.3.9 = select i1 %cmp10.9, i32 %3, i32 %xindex.3.8
+  %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
+  %indvars.iv.next.954 = or i64 %indvars.iv, 10
+  %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
+  %22 = load float* %arrayidx9.10, align 8, !tbaa !5
+  %cmp10.10 = fcmp ogt float %22, %max.3.9
+  %23 = trunc i64 %indvars.iv.next.954 to i32
+  %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
+  %xindex.3.10 = select i1 %cmp10.10, i32 %3, i32 %xindex.3.9
+  %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
+  %indvars.iv.next.1055 = or i64 %indvars.iv, 11
+  %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
+  %24 = load float* %arrayidx9.11, align 4, !tbaa !5
+  %cmp10.11 = fcmp ogt float %24, %max.3.10
+  %25 = trunc i64 %indvars.iv.next.1055 to i32
+  %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
+  %xindex.3.11 = select i1 %cmp10.11, i32 %3, i32 %xindex.3.10
+  %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
+  %indvars.iv.next.1156 = or i64 %indvars.iv, 12
+  %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
+  %26 = load float* %arrayidx9.12, align 16, !tbaa !5
+  %cmp10.12 = fcmp ogt float %26, %max.3.11
+  %27 = trunc i64 %indvars.iv.next.1156 to i32
+  %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
+  %xindex.3.12 = select i1 %cmp10.12, i32 %3, i32 %xindex.3.11
+  %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
+  %indvars.iv.next.1257 = or i64 %indvars.iv, 13
+  %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
+  %28 = load float* %arrayidx9.13, align 4, !tbaa !5
+  %cmp10.13 = fcmp ogt float %28, %max.3.12
+  %29 = trunc i64 %indvars.iv.next.1257 to i32
+  %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
+  %xindex.3.13 = select i1 %cmp10.13, i32 %3, i32 %xindex.3.12
+  %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
+  %indvars.iv.next.1358 = or i64 %indvars.iv, 14
+  %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
+  %30 = load float* %arrayidx9.14, align 8, !tbaa !5
+  %cmp10.14 = fcmp ogt float %30, %max.3.13
+  %31 = trunc i64 %indvars.iv.next.1358 to i32
+  %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
+  %xindex.3.14 = select i1 %cmp10.14, i32 %3, i32 %xindex.3.13
+  %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
+  %indvars.iv.next.1459 = or i64 %indvars.iv, 15
+  %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
+  %32 = load float* %arrayidx9.15, align 4, !tbaa !5
+  %cmp10.15 = fcmp ogt float %32, %max.3.14
+  %33 = trunc i64 %indvars.iv.next.1459 to i32
+  %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
+  %xindex.3.15 = select i1 %cmp10.15, i32 %3, i32 %xindex.3.14
+  %max.3.15 = select i1 %cmp10.15, float %32, float %max.3.14
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 256
+  br i1 %exitcond.15, label %for.inc15, label %for.body7
+
+for.inc15:                                        ; preds = %for.body7
+  %indvars.iv.next43 = add i64 %indvars.iv42, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end17, label %for.cond5.preheader
+
+for.end17:                                        ; preds = %for.inc15
+  %conv = sitofp i32 %xindex.3.15 to float
+  %add = fadd float %max.3.15, %conv
+  %conv18 = sitofp i32 %yindex.3.15 to float
+  %add19 = fadd float %add, %conv18
+  %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
+  %inc22 = add nsw i32 %nl.041, 1
+  %exitcond44 = icmp eq i32 %inc22, 78100
+  br i1 %exitcond44, label %for.end23, label %for.body
+
+for.end23:                                        ; preds = %for.end17
+  %call24 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call24, %call1
+  %conv25 = sitofp i64 %sub to double
+  %div = fdiv double %conv25, 1.000000e+06
+  %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+  %add29 = fadd float %add, 1.000000e+00
+  %add31 = fadd float %add29, %conv18
+  %add32 = fadd float %add31, 1.000000e+00
+  store float %add32, float* @temp, align 4, !tbaa !5
+  tail call void @check(i32 -1)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
new file mode 100644
index 0000000..a18829e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
+entry:
+  %yy = load i32* %yp
+  %y = add i32 %yy, 1
+  %z = zext i32 %y to i64
+  %z2 = shl i64 %z, 32 
+  store i64 %z2, i64* %xx, align 4
+  ret void
+
+; CHECK: test:
+; CHECK: sldi {{.*}}, {{.*}}, 32
+; Note: it's okay if someday CodeGen gets smart enough to optimize out
+; the shift.
+}
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 466ae80..28dd08c 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 302d3df..d07fea7 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -21,12 +21,14 @@ define i32* @f1() nounwind {
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
+
 ; PPC32-FP: _f1:
-; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	lis r0, -1
+; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	ori r0, r0, 32704
 ; PPC32-FP:	stwux r1, r1, r0
-; ...
+; PPC32-FP:	mr r31, r1
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -42,11 +44,12 @@ define i32* @f1() nounwind {
 
 
 ; PPC64-FP: _f1:
-; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	lis r0, -1
+; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	ori r0, r0, 32640
 ; PPC64-FP:	stdux r1, r1, r0
-; ...
+; PPC64-FP:	mr r31, r1
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index b10a996..7b0d69c 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {std r3, 9024}
+; RUN:   grep {std r4, 9024}
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
new file mode 100644
index 0000000..932ad7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 318ccb0..9a456b6 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,8 +2,8 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li 4, 2
 ; CHECK: li {{[53]}}, 0
+; CHECK: li 4, 2
 ; CHECK: li 6, 3
 ; CHECK: mr {{[53]}}, {{[53]}}
 
diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll
new file mode 100644
index 0000000..386c59e
--- /dev/null
+++ b/test/CodeGen/PowerPC/bl8_elf_nop.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck  %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @clock() nounwind
+
+define i32 @func() {
+entry:
+  %call = call i32 @clock() nounwind
+  %call2 = add i32 %call, 7
+  ret i32 %call2
+}
+
+; CHECK: bl clock
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/can-lower-ret.ll b/test/CodeGen/PowerPC/can-lower-ret.ll
new file mode 100644
index 0000000..acf4104
--- /dev/null
+++ b/test/CodeGen/PowerPC/can-lower-ret.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=ppc64
+
+define <4 x float> @foo1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x float> %b, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x float> %0, <4 x float> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x double> @foo2(<2 x double> %a, <2 x double> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x double> %b, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x double> %2
+}
+
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index ab493a0..1d365d4 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,11 +1,11 @@
 ; Make sure this testcase does not use ctpop
 ; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
-        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]
+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
new file mode 100644
index 0000000..e161cb0
--- /dev/null
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
+; CHECK: main:
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  %add = add nsw i32 %argc, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 1, i32 14, metadata !5, null}
+!18 = metadata !{i32 1, i32 26, metadata !5, null}
+!19 = metadata !{i32 2, i32 3, metadata !20, null}
+!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
deleted file mode 100644
index 9e50b55..0000000
--- a/test/CodeGen/PowerPC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PowerPC] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 29c620e..4b6f88b 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -17,10 +17,22 @@ entry:
 bb2:                                              ; preds = %entry, %bb3
   %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
 ; PIC: mtctr
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
 ; STATIC-NEXT: bctr
 ; PPC64: mtctr
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
 ; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
@@ -47,8 +59,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb)
+; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]]
 ; PIC: stw r[[R2]]
 ; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..4019eca
--- /dev/null
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
deleted file mode 100644
index 6042991..0000000
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-;ModuleID = 'test.c'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-freebsd9.0"
-
-%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
-
-@var1 = common global i64 0, align 8
-@var2 = common global double 0.0, align 8
-@var3 = common global i32 0, align 4
-
-define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
- entry:
-  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK:      lbz 4, 0(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 6, 0
-; CHECK-NEXT: addi 6, 4, 1
-; CHECK-NEXT: stw 3, -4(1)
-; CHECK-NEXT: stw 6, -8(1)
-; CHECK-NEXT: stw 4, -12(1)
-; CHECK-NEXT: stw 5, -16(1)
-; CHECK-NEXT: bne 0, .LBB0_2
-; CHECK-NEXT: # BB#1:                                 # %entry
-; CHECK-NEXT: lwz 3, -12(1)
-; CHECK-NEXT: stw 3, -8(1)
-; CHECK-NEXT: .LBB0_2:                                # %entry
-; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: lwz 5, 8(4)
-; CHECK-NEXT: slwi 6, 3, 2
-; CHECK-NEXT: addi 7, 3, 2
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: addi 8, 3, 4
-; CHECK-NEXT: add 5, 5, 6
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -20(1)
-; CHECK-NEXT: stw 5, -24(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: stw 7, -32(1)
-; CHECK-NEXT: stw 8, -36(1)
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -36(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: .LBB0_4:                                # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: lwz 4, -32(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -24(1)
-; CHECK-NEXT: lwz 0, -20(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 3, -40(1)
-; CHECK-NEXT: stw 4, -44(1)
-; CHECK-NEXT: blt 0, .LBB0_6
-; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: stw 3, -44(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
-; CHECK-NEXT: lwz 3, -44(1)
-; CHECK-NEXT: lwz 4, -40(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-  store i64 %x, i64* @var1, align 8
-; CHECK-NEXT: lis 4, var1@ha
-; CHECK-NEXT: lwz 6, 4(3)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: la 7, var1@l(4)
-; CHECK-NEXT: stw 3, var1@l(4)
-; CHECK-NEXT: stw 6, 4(7)
-  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 3, 1(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 3
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: addi 7, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 8
-; CHECK-NEXT: addi 6, 6, 32
-; CHECK-NEXT: mr 8, 4
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -48(1)
-; CHECK-NEXT: stw 4, -52(1)
-; CHECK-NEXT: stw 6, -56(1)
-; CHECK-NEXT: stw 7, -60(1)
-; CHECK-NEXT: stw 3, -64(1)
-; CHECK-NEXT: stw 8, -68(1)
-; CHECK-NEXT: blt 0, .LBB0_8
-; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -64(1)
-; CHECK-NEXT: stw 3, -68(1)
-; CHECK-NEXT: .LBB0_8:                                # %entry
-; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lwz 4, -60(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 1(5)
-; CHECK-NEXT: lwz 4, -56(1)
-; CHECK-NEXT: lwz 0, -48(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -72(1)
-; CHECK-NEXT: stw 3, -76(1)
-; CHECK-NEXT: blt 0, .LBB0_10
-; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -52(1)
-; CHECK-NEXT: stw 3, -72(1)
-; CHECK-NEXT: .LBB0_10:                               # %entry
-; CHECK-NEXT: lwz 3, -72(1)
-; CHECK-NEXT: lwz 4, -76(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lfd 0, 0(3)
-  store double %y, double* @var2, align 8
-; CHECK-NEXT: lis 3, var2@ha
-; CHECK-NEXT: stfd 0, var2@l(3)
-  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lbz 3, 0(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 2
-; CHECK-NEXT: addi 8, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 4
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: stw 6, -80(1)
-; CHECK-NEXT: stw 8, -84(1)
-; CHECK-NEXT: stw 3, -88(1)
-; CHECK-NEXT: stw 4, -92(1)
-; CHECK-NEXT: stw 7, -96(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -100(1)
-; CHECK-NEXT: blt 0, .LBB0_12
-; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: stw 3, -96(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
-; CHECK-NEXT: lwz 3, -96(1)
-; CHECK-NEXT: lwz 4, -84(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -80(1)
-; CHECK-NEXT: lwz 0, -100(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -104(1)
-; CHECK-NEXT: stw 3, -108(1)
-; CHECK-NEXT: blt 0, .LBB0_14
-; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: stw 3, -104(1)
-; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -104(1)
-; CHECK-NEXT: lwz 4, -108(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lwz 3, 0(3)
-  store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 4, var3@ha
-; CHECK-NEXT: stw 3, var3@l(4)
-  ret void
-; CHECK-NEXT: stw 5, -112(1)
-; CHECK-NEXT: blr
-}
-
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
new file mode 100644
index 0000000..1fad2fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
new file mode 100644
index 0000000..1274173
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s -check-prefix=BE-CHK
+
+define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
+entry:
+  fence acquire
+  %cond = icmp eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+
+IfEqual:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  br label %end
+
+IfUnequal:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  ret i32 0
+
+end:
+  ret i32 1
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
new file mode 100644
index 0000000..d5c4d46
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-ind-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: ld 11, 0(3)
+; CHECK: ld 2, 8(3)
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
new file mode 100644
index 0000000..e5aa1f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+
+; CHECK:      .section	.opd,"aw",@progbits
+; CHECK-NEXT: test1:
+; CHECK-NEXT:	.align 3
+; CHECK-NEXT:	.quad .L.test1
+; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:	.text
+; CHECK-NEXT: .L.test1:
+
+define i32 @test1(i32 %a) nounwind {
+entry:
+  ret i32 %a
+}
+
+; Until recently, binutils accepted the .size directive as:
+;  .size	test1, .Ltmp0-test1
+; however, using this directive with recent binutils will result in the error:
+;  .size expression for XXX does not evaluate to a constant
+; so we must use the label which actually tags the start of the function.
+; CHECK: .size	test1, .Ltmp0-.L.test1
diff --git a/test/CodeGen/PowerPC/ppc64-prefetch.ll b/test/CodeGen/PowerPC/ppc64-prefetch.ll
new file mode 100644
index 0000000..b2f3709
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-prefetch.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define void @test1(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; CHECK: @test1
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
new file mode 100644
index 0000000..5a63b01
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
@@ -0,0 +1,20 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i32 @intvaarg(i32 %a, ...) nounwind {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %0 = va_arg i8** %va, i32
+  %sub = sub nsw i32 %a, %0
+  ret i32 %sub
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: @intvaarg
+; Make sure that the va pointer is incremented by 8 (not 4).
+; CHECK: addi{{.*}}, 8
+
diff --git a/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
new file mode 100644
index 0000000..aa7de16
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=sparc <%s
+
+define void @foo(i32 %a) nounwind {
+entry:
+  br i1 undef, label %return, label %else.0
+
+else.0:
+  br i1 undef, label %if.end.0, label %return
+
+if.end.0:
+  br i1 undef, label %if.then.1, label %else.1
+
+else.1:
+  %0 = bitcast i8* undef to i8**
+  br label %else.1.2
+
+if.then.1:
+  br i1 undef, label %return, label %return
+
+else.1.2:
+  br i1 undef, label %return, label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/SPARC/dg.exp b/test/CodeGen/SPARC/dg.exp
deleted file mode 100644
index 6c0a997..0000000
--- a/test/CodeGen/SPARC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Sparc] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
new file mode 100644
index 0000000..786fee9
--- /dev/null
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
deleted file mode 100644
index 6f3cbac..0000000
--- a/test/CodeGen/SystemZ/00-RetVoid.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define void @foo() {
-entry:
-    ret void
-}
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
deleted file mode 100644
index 8e1ff49..0000000
--- a/test/CodeGen/SystemZ/01-RetArg.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    ret i64 %b
-}
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
deleted file mode 100644
index 8b99e68..0000000
--- a/test/CodeGen/SystemZ/01-RetImm.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 1
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 1
-; RUN: llc < %s -march=systemz | grep llihf | count 1
-
-
-define i64 @foo1() {
-entry:
-    ret i64 1
-}
-
-define i64 @foo2() {
-entry:
-    ret i64 65535 
-}
-
-define i64 @foo3() {
-entry:
-    ret i64 131072
-}
-
-define i64 @foo4() {
-entry:
-    ret i64 8589934592
-}
-
-define i64 @foo5() {
-entry:
-    ret i64 562949953421312
-}
-
-define i64 @foo6() {
-entry:
-    ret i64 65537
-}
-
-define i64 @foo7() {
-entry:
-    ret i64 4294967295
-}
-
-define i64 @foo8() {
-entry:
-    ret i64 281483566645248
-}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
deleted file mode 100644
index ee9e5e9..0000000
--- a/test/CodeGen/SystemZ/02-MemArith.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc < %s -march=systemz | FileCheck %s
-
-define signext i32 @foo1(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo1:
-; CHECK:  a %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo2(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo2:
-; CHECK:  ay %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo3(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo3:
-; CHECK:  ag %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = add i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo4(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo4:
-; CHECK:  n %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo5(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo5:
-; CHECK:  ny %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo6(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo6:
-; CHECK:  ng %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = and i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo7(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo7:
-; CHECK:  o %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo8(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo8:
-; CHECK:  oy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo9(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo9:
-; CHECK:  og %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = or i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo10(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo10:
-; CHECK:  x %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo11(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo11:
-; CHECK:  xy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo12(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo12:
-; CHECK:  xg %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = xor i64 %a, %c
-    ret i64 %d
-}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
deleted file mode 100644
index d5dfa22..0000000
--- a/test/CodeGen/SystemZ/02-RetAdd.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
deleted file mode 100644
index 40f6cce..0000000
--- a/test/CodeGen/SystemZ/02-RetAddImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
deleted file mode 100644
index b568a57..0000000
--- a/test/CodeGen/SystemZ/02-RetAnd.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
deleted file mode 100644
index 53c5e54..0000000
--- a/test/CodeGen/SystemZ/02-RetAndImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr   | count 4
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
deleted file mode 100644
index 3f6ba2f..0000000
--- a/test/CodeGen/SystemZ/02-RetNeg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcgr | count 1
-
-define i64 @foo(i64 %a) {
-entry:
-    %c = sub i64 0, %a
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
deleted file mode 100644
index a1ddb63..0000000
--- a/test/CodeGen/SystemZ/02-RetOr.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
deleted file mode 100644
index 68cd24d..0000000
--- a/test/CodeGen/SystemZ/02-RetOrImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill | count 1
-; RUN: llc < %s -march=systemz | grep oilh | count 1
-; RUN: llc < %s -march=systemz | grep oihl | count 1
-; RUN: llc < %s -march=systemz | grep oihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
deleted file mode 100644
index 98e1861..0000000
--- a/test/CodeGen/SystemZ/02-RetSub.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
deleted file mode 100644
index 8479fbf..0000000
--- a/test/CodeGen/SystemZ/02-RetSubImm.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
deleted file mode 100644
index 4d1adf2..0000000
--- a/test/CodeGen/SystemZ/02-RetXor.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
deleted file mode 100644
index 473bbf7..0000000
--- a/test/CodeGen/SystemZ/02-RetXorImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
deleted file mode 100644
index 0a7f5ee..0000000
--- a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
deleted file mode 100644
index 337bb3f..0000000
--- a/test/CodeGen/SystemZ/03-RetAddSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ar    | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
deleted file mode 100644
index c5326ab..0000000
--- a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
deleted file mode 100644
index 75dc90a..0000000
--- a/test/CodeGen/SystemZ/03-RetAndSubreg.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr | count 3
-; RUN: llc < %s -march=systemz | grep nihf | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
deleted file mode 100644
index 476821a..0000000
--- a/test/CodeGen/SystemZ/03-RetArgSubreg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lgr   | count 2
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    ret i32 %b
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
deleted file mode 100644
index 70da913..0000000
--- a/test/CodeGen/SystemZ/03-RetImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 2
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 2
-
-
-define i32 @foo1() {
-entry:
-    ret i32 1
-}
-
-define i32 @foo2() {
-entry:
-    ret i32 65535 
-}
-
-define i32 @foo3() {
-entry:
-    ret i32 131072
-}
-
-define i32 @foo4() {
-entry:
-    ret i32 65537
-}
-
-define i32 @foo5() {
-entry:
-    ret i32 4294967295
-}
-
-define zeroext i32 @foo6()  {
-entry:
-    ret i32 4294967295
-}
-
-define signext i32 @foo7()  {
-entry:
-    ret i32 4294967295
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
deleted file mode 100644
index 87ebcc1..0000000
--- a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcr | count 1
-
-define i32 @foo(i32 %a) {
-entry:
-    %c = sub i32 0, %a
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
deleted file mode 100644
index 99adea8..0000000
--- a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill  | count 3
-; RUN: llc < %s -march=systemz | grep oilh  | count 3
-; RUN: llc < %s -march=systemz | grep oilf  | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
deleted file mode 100644
index 7dab5ca..0000000
--- a/test/CodeGen/SystemZ/03-RetOrSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ogr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
deleted file mode 100644
index 21ea9b5..0000000
--- a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
deleted file mode 100644
index 24b7631..0000000
--- a/test/CodeGen/SystemZ/03-RetSubSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sr    | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
deleted file mode 100644
index 70ee454..0000000
--- a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xilf  | count 9
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
deleted file mode 100644
index 02c4a2a..0000000
--- a/test/CodeGen/SystemZ/03-RetXorSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xgr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
deleted file mode 100644
index cccdc47..0000000
--- a/test/CodeGen/SystemZ/04-RetShifts.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sra   | count 6
-; RUN: llc < %s -march=systemz | grep srag  | count 3
-; RUN: llc < %s -march=systemz | grep srl   | count 6
-; RUN: llc < %s -march=systemz | grep srlg  | count 3
-; RUN: llc < %s -march=systemz | grep sll   | count 6
-; RUN: llc < %s -march=systemz | grep sllg  | count 3
-
-define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = shl i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = shl i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, %idx
-        ret i64 %shr
-}
-
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
deleted file mode 100644
index cf02642..0000000
--- a/test/CodeGen/SystemZ/05-LoadAddr.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep lay | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
deleted file mode 100644
index 3cf21cc..0000000
--- a/test/CodeGen/SystemZ/05-MemImmStores.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
-; RUN: llc < %s | grep mvi   | count 2
-; RUN: llc < %s | grep mviy  | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
-	store i64 1, i64* %add.ptr
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
-	store i32 2, i32* %add.ptr
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
-	store i16 3, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
-	store i8 4, i8* %add.ptr
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
-        store i8 4, i8* %add.ptr
-        ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
-        store i16 3, i16* %add.ptr
-        ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
deleted file mode 100644
index eabeb0a..0000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: llc < %s | grep ly     | count 2
-; RUN: llc < %s | grep sty    | count 2
-; RUN: llc < %s | grep {l	%}  | count 2
-; RUN: llc < %s | grep {st	%} | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i32* %foo		; <i32> [#uses=1]
-	store i32 %tmp1, i32* %bar
-	ret void
-}
-
-define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
-
-define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr
-	ret void
-}
-
-define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
deleted file mode 100644
index 53bb641..0000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc < %s | grep {sthy.%} | count 2
-; RUN: llc < %s | grep {lhy.%}  | count 2
-; RUN: llc < %s | grep {lh.%}   | count 6
-; RUN: llc < %s | grep {sth.%}  | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	store i16 %tmp1, i16* %bar
-	ret void
-}
-
-define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %bar
-	ret void
-}
-
-define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
-
-define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr
-	ret void
-}
-
-define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
deleted file mode 100644
index f690a48..0000000
--- a/test/CodeGen/SystemZ/05-MemRegLoads.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc < %s -march=systemz | not grep aghi
-; RUN: llc < %s -march=systemz | grep llgf | count 1
-; RUN: llc < %s -march=systemz | grep llgh | count 1
-; RUN: llc < %s -march=systemz | grep llgc | count 1
-; RUN: llc < %s -march=systemz | grep lgf  | count 2
-; RUN: llc < %s -march=systemz | grep lgh  | count 2
-; RUN: llc < %s -march=systemz | grep lgb  | count 1
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
-
-define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
deleted file mode 100644
index b851c3f..0000000
--- a/test/CodeGen/SystemZ/05-MemRegStores.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s | not grep aghi
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-
-; CHECK: foo1:
-; CHECK:   stg %r4, 8(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	store i64 %val, i64* %add.ptr2
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
-entry:
-; CHECK: foo2:
-; CHECK:   st %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	store i32 %val, i32* %add.ptr2
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
-entry:
-; CHECK: foo3:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	store i16 %val, i16* %add.ptr2
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
-entry:
-; CHECK: foo4:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	store i8 %val, i8* %add.ptr2
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo5:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
-	store i8 %conv, i8* %add.ptr2
-	ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo6:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
-	store i16 %conv, i16* %add.ptr2
-	ret void
-}
-
-define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo7:
-; CHECK: st      %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %add.ptr2
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
deleted file mode 100644
index e904f49..0000000
--- a/test/CodeGen/SystemZ/06-CallViaStack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s | grep 168 | count 1
-; RUN: llc < %s | grep 160 | count 3
-; RUN: llc < %s | grep 328 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
-entry:
-	%a = alloca i64, align 8		; <i64*> [#uses=3]
-	store i64 %g, i64* %a
-	call void @bar(i64* %a) nounwind
-	%tmp1 = load i64* %a		; <i64> [#uses=1]
-	ret i64 %tmp1
-}
-
-declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
deleted file mode 100644
index c71da9b..0000000
--- a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-        ret i64 %f
-}
-
-define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
-	ret i64 %conv
-}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
deleted file mode 100644
index d89b0df..0000000
--- a/test/CodeGen/SystemZ/06-LocalFrame.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 328 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
-entry:
-	%g = alloca i64, align 8		; <i64*> [#uses=1]
-	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
-	ret i64* %add.ptr
-}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
deleted file mode 100644
index fd4b5029..0000000
--- a/test/CodeGen/SystemZ/06-SimpleCall.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo() nounwind {
-entry:
-	tail call void @bar() nounwind
-	ret void
-}
-
-declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
deleted file mode 100644
index 8599717..0000000
--- a/test/CodeGen/SystemZ/07-BrCond.ll
+++ /dev/null
@@ -1,141 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
deleted file mode 100644
index 8ece5ac..0000000
--- a/test/CodeGen/SystemZ/07-BrCond32.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
deleted file mode 100644
index ac6067a..0000000
--- a/test/CodeGen/SystemZ/07-BrUnCond.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-
-define void @foo() noreturn nounwind {
-entry:
-	tail call void @baz() nounwind
-	br label %l1
-
-l1:		; preds = %entry, %l1
-	tail call void @bar() nounwind
-	br label %l1
-}
-
-declare void @bar()
-
-declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
deleted file mode 100644
index 4d0ebda..0000000
--- a/test/CodeGen/SystemZ/07-CmpImm.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; RUN: llc < %s | grep cgfi | count 8
-; RUN: llc < %s | grep clgfi | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
deleted file mode 100644
index add34fa..0000000
--- a/test/CodeGen/SystemZ/07-CmpImm32.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; RUN: llc < %s | grep jl  | count 3
-; RUN: llc < %s | grep jh  | count 3
-; RUN: llc < %s | grep je  | count 2
-; RUN: llc < %s | grep jne | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
deleted file mode 100644
index aa4b36e..0000000
--- a/test/CodeGen/SystemZ/07-SelectCC.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep clgr
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
-	ret i64 %cond
-}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
deleted file mode 100644
index ff1e441..0000000
--- a/test/CodeGen/SystemZ/08-DivRem.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s | grep dsgr  | count 2
-; RUN: llc < %s | grep dsgfr | count 2
-; RUN: llc < %s | grep dlr   | count 2
-; RUN: llc < %s | grep dlgr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @div2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = udiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = udiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @rem(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = srem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = srem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
-
-define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = urem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = urem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
deleted file mode 100644
index d6ec0e7..0000000
--- a/test/CodeGen/SystemZ/08-DivRemMemOp.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: llc < %s | grep {dsgf.%} | count 2
-; RUN: llc < %s | grep {dsg.%}  | count 2
-; RUN: llc < %s | grep {dl.%}   | count 2
-; RUN: llc < %s | grep dlg      | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64* %b) nounwind readnone {
-entry:
-	%b1 = load i64* %b
-	%div = sdiv i64 %a, %b1
-	ret i64 %div
-}
-
-define i64 @div1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = udiv i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = srem i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = urem i64 %a, %b1
-        ret i64 %div
-}
-
-define i32 @div2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = sdiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @div3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = udiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = srem i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = urem i32 %a, %b1
-        ret i32 %div
-}
-
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
deleted file mode 100644
index 1ab88d6..0000000
--- a/test/CodeGen/SystemZ/08-SimpleMuls.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s | grep msgr | count 2
-; RUN: llc < %s | grep msr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
-
-define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
deleted file mode 100644
index 30810ce..0000000
--- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo(i64 %N) nounwind {
-entry:
-	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
-	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
-	call void @bar(i8* %vla) nounwind
-	ret void
-}
-
-declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
deleted file mode 100644
index 50a26e2..0000000
--- a/test/CodeGen/SystemZ/09-Globals.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s | grep larl | count 3
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-@bar = common global i64 0, align 8		; <i64*> [#uses=3]
-
-define i64 @foo() nounwind readonly {
-entry:
-	%tmp = load i64* @bar		; <i64> [#uses=1]
-	ret i64 %tmp
-}
-
-define i64* @foo2() nounwind readnone {
-entry:
-	ret i64* @bar
-}
-
-define i64* @foo3(i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
deleted file mode 100644
index 32aaa62..0000000
--- a/test/CodeGen/SystemZ/09-Switches.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=systemz | grep larl
-
-define i32 @main(i32 %tmp158) {
-entry:
-        switch i32 %tmp158, label %bb336 [
-		 i32 -2147483648, label %bb338
-		 i32 -2147483647, label %bb338
-		 i32 -2147483646, label %bb338
-		 i32 120, label %bb338
-		 i32 121, label %bb339
-		 i32 122, label %bb340
-                 i32 123, label %bb341
-                 i32 124, label %bb342
-                 i32 125, label %bb343
-                 i32 126, label %bb336
-		 i32 1024, label %bb338
-                 i32 0, label %bb338
-                 i32 1, label %bb338
-                 i32 2, label %bb338
-                 i32 3, label %bb338
-                 i32 4, label %bb338
-		 i32 5, label %bb338
-        ]
-bb336:
-  ret i32 10
-bb338:
-  ret i32 11
-bb339:
-  ret i32 12
-bb340:
-  ret i32 13
-bb341:
-  ret i32 14
-bb342:
-  ret i32 15
-bb343:
-  ret i32 18
-
-}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
deleted file mode 100644
index f291e5f..0000000
--- a/test/CodeGen/SystemZ/10-FuncsPic.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
-; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@ptr = external global void (...)*		; <void (...)**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	store void (...)* @func, void (...)** @ptr
-	ret void
-}
-
-declare void @func(...)
-
-define void @foo2() nounwind {
-entry:
-	tail call void (...)* @func() nounwind
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
-	tail call void (...)* %tmp() nounwind
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
deleted file mode 100644
index c581ad9..0000000
--- a/test/CodeGen/SystemZ/10-GlobalsPic.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@src = external global i32		; <i32*> [#uses=2]
-@dst = external global i32		; <i32*> [#uses=2]
-@ptr = external global i32*		; <i32**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	store i32 %tmp, i32* @dst
-	ret void
-}
-
-define void @foo2() nounwind {
-entry:
-	store i32* @dst, i32** @ptr
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
-	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
-	store i32 %tmp, i32* %arrayidx
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
deleted file mode 100644
index 1aa9c67..0000000
--- a/test/CodeGen/SystemZ/11-BSwap.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-
-define zeroext i16 @foo(i16 zeroext %a)  {
-	%res = tail call i16 @llvm.bswap.i16(i16 %a)
-	ret i16 %res
-}
-
-define zeroext i32 @foo2(i32 zeroext %a)  {
-; CHECK: foo2:
-; CHECK:  lrvr [[R1:%r.]], %r2
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define zeroext i64 @foo3(i64 %a)  {
-; CHECK: foo3:
-; CHECK:  lrvgr %r2, %r2
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define zeroext i16 @foo4(i16* %b)  {
-	%a = load i16* %b
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        ret i16 %res
-}
-
-define zeroext i32 @foo5(i32* %b)  {
-; CHECK: foo5:
-; CHECK:  lrv [[R1:%r.]], 0(%r2)
-	%a = load i32* %b
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define i64 @foo6(i64* %b) {
-; CHECK: foo6:
-; CHECK:  lrvg %r2, 0(%r2)
-	%a = load i64* %b
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define void @foo7(i16 %a, i16* %b) {
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        store i16 %res, i16* %b
-        ret void
-}
-
-define void @foo8(i32 %a, i32* %b) {
-; CHECK: foo8:
-; CHECK:  strv %r2, 0(%r3)
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        store i32 %res, i32* %b
-        ret void
-}
-
-define void @foo9(i64 %a, i64* %b) {
-; CHECK: foo9:
-; CHECK:  strvg %r2, 0(%r3)
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        store i64 %res, i64* %b
-        ret void
-}
-
-declare i16 @llvm.bswap.i16(i16) nounwind readnone
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
deleted file mode 100644
index 65f8e14..0000000
--- a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i32 @main() nounwind {
-entry:
-	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
-	unreachable
-}
-
-declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
deleted file mode 100644
index 3cfa97d..0000000
--- a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=systemz | grep nilf | count 1
-; RUN: llc < %s -march=systemz | grep nill | count 1
-
-define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
-entry:
-        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
-        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
-        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
-        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
-        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
-        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
-        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
-        ret i32 %conv5
-}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
deleted file mode 100644
index 54424e1..0000000
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=systemz | grep rll
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
-entry:
-	%shl = shl i32 %x, 1		; <i32> [#uses=1]
-	%sub = sub i32 32, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
-	%or = or i32 %shr, %shl		; <i32> [#uses=1]
-	ret i32 %or
-}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
deleted file mode 100644
index 5f6ec50d..0000000
--- a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
-	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
-
-define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
-entry:
-	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
-	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
-	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
-
-if.then20.i:		; preds = %entry
-	ret i32 -2
-
-byte_re_search_2.exit:		; preds = %entry
-	ret i32 -1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
deleted file mode 100644
index 89b2225..0000000
--- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
-entry:
-	br label %for.body38
-
-for.body38:		; preds = %for.body38, %entry
-	br i1 undef, label %for.cond220, label %for.body38
-
-for.cond220:		; preds = %for.cond220, %for.body38
-	br i1 false, label %for.cond220, label %for.end297
-
-for.end297:		; preds = %for.cond220
-	%tmp334 = load i8* undef		; <i8> [#uses=1]
-	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
-	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
-	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
-	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
-	br label %for.body356
-
-for.body356:		; preds = %for.body356, %for.end297
-	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
-	br label %for.body356
-}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
deleted file mode 100644
index 68ccb84..0000000
--- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
-entry:
-	br i1 undef, label %for.body, label %return
-
-for.body:		; preds = %entry
-	%add = add i32 0, %col		; <i32> [#uses=1]
-	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
-	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
-	br i1 undef, label %if.then13, label %if.else
-
-if.then13:		; preds = %for.body
-	ret i32 0
-
-if.else:		; preds = %for.body
-	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
-	ret i32 0
-
-return:		; preds = %entry
-	ret i32 1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
deleted file mode 100644
index 92f5467..0000000
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=basic | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
-
-declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
-
-define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
-; CHECK: lg %r{{[0-9]+}}, 328(%r15)
-
-newFuncRoot:
-	br label %bb3
-
-bb4.exitStub:		; preds = %bb3
-	store double %call, double* %call.out
-	ret void
-
-bb3:		; preds = %newFuncRoot
-	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
-	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
-	br label %bb4.exitStub
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
deleted file mode 100644
index f4e176e..0000000
--- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define float @foo(i32 signext %a) {
-entry:
-    %b = bitcast i32 %a to float
-    ret float %b
-}
-
-define i32 @bar(float %a) {
-entry:
-    %b = bitcast float %a to i32
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
deleted file mode 100644
index 63fd855..0000000
--- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @dfg_parse() nounwind {
-entry:
-	br i1 undef, label %if.then2208, label %if.else2360
-
-if.then2208:		; preds = %entry
-	br i1 undef, label %bb.nph3189, label %for.end2270
-
-bb.nph3189:		; preds = %if.then2208
-	unreachable
-
-for.end2270:		; preds = %if.then2208
-	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
-	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
-	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
-	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
-	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
-	store i8 117, i8* %yyd.0.i2561.13
-	br label %while.cond.i2558
-
-while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
-	br label %while.cond.i2558
-
-if.else2360:		; preds = %entry
-	unreachable
-}
-
-declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
deleted file mode 100644
index f7686f1..0000000
--- a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
-
-define internal void @frame_dummy() nounwind {
-entry:
-  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
-  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
-  br i1 %0, label %return, label %bb3
-
-bb3:                                              ; preds = %entry
-  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
-  ret void
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
deleted file mode 100644
index fde7d9d..0000000
--- a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-define double @foo(double %a, double %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
-  ret double %0
-}
-
-define float @bar(float %a, float %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
-  ret float %0
-}
-
-
-declare double @copysign(double, double) nounwind readnone
-declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
deleted file mode 100644
index d730bec..0000000
--- a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
-target triple = "s390x-elf"
-
-@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
-
-define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
-entry:
-  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
-  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
-  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
-  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
-  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
-  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
-  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
-  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
-  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
-  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
-  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
-  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
-  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
-  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
-  br i1 %or.cond, label %if.then13, label %if.end25
-
-if.then13:                                        ; preds = %entry
-  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
-  store i32 %div, i32* %arrayidx4
-  br label %if.end25
-
-if.end25:                                         ; preds = %if.then13, %entry
-  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
-  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
-  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
-  br i1 %or.cond46, label %if.then31, label %if.end45
-
-if.then31:                                        ; preds = %if.end25
-  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
-  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
-  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
-  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
-  store i32 %sub41, i32* %arrayidx4
-  ret void
-
-if.end45:                                         ; preds = %if.end25
-  ret void
-}
-
-declare signext i32 @FORMAT2(...)
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index c2877ac..0000000
--- a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
deleted file mode 100644
index e9624ba..0000000
--- a/test/CodeGen/SystemZ/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target SystemZ] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index 2890c22..ed55bb5 100644
--- a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -1,11 +1,7 @@
-; DISABLED: llc -mtriple=thumbv6-apple-darwin < %s
-; RUN: false
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s
 ; rdar://problem/9416774
 ; ModuleID = 'reduced.ll'
 
-; byval is currently unsupported.
-; XFAIL: *
-
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fbacaba..f8c438c 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
 ; CHECK: test1:
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index c2ba208..50a1a07 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -13,9 +13,9 @@ entry:
 
 bb:             ; preds = %bb, %entry
         %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
-        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]
+        %tmp = load volatile i8** %va           ; <i8*> [#uses=2]
         %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]
-        volatile store i8* %tmp2, i8** %va
+        store volatile i8* %tmp2, i8** %va
         %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
         %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]
         br i1 %tmp.upgrd.2, label %bb7, label %bb
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4e1394f..4616dcf 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp2,+thumb2 | FileCheck %s
 ; rdar://7076238
 
 @"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 034a28f..524e5a6 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -5,7 +5,7 @@ define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y
 entry:
 ; -- The loop following the load should only use a single add-literation
 ;    instruction.
-; CHECK: ldr.64
+; CHECK: vldr
 ; CHECK: adds r{{[0-9]+.*}}#1
 ; CHECK-NOT: adds
 ; CHECK: subsections_via_symbols
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index bb734ac..fcf1bae 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -21,7 +21,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  volatile store i32 1, i32* %tmp1, align 4
+  store volatile i32 1, i32* %tmp1, align 4
   %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
   store i32 %levels, i32* %tmp12, align 4
   %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
@@ -46,7 +46,7 @@ entry:
   %tmp24 = shl i32 %flags.0, 16
   %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
   %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
-  volatile store i32 1, i32* %tmp25, align 4
+  store volatile i32 1, i32* %tmp25, align 4
   %tmp26 = icmp eq i32 %levels, 0
   br i1 %tmp26, label %return, label %bb4
 
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 01fb0a5..06762ba 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,7 +23,7 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: vldr [[LDR:d.*]],
 ; CHECK: LPC0_0:
 ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK-NOT: vmov.f64 [[ADD]]
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index d2140a1..5cb266b 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
new file mode 100644
index 0000000..dadbdc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 
+
+%struct.LIST_NODE.0.16 = type { %struct.LIST_NODE.0.16*, i8* }
+
+define %struct.LIST_NODE.0.16* @list_AssocListPair(%struct.LIST_NODE.0.16* %List, i8* %Key) nounwind readonly {
+entry:
+  br label %bb3
+
+bb:                                               ; preds = %bb3
+  %Scan.0.idx7.val = load i8** undef, align 4
+  %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+  %0 = bitcast i8* %.idx to i8**
+  %.idx.val = load i8** %0, align 4
+  %1 = icmp eq i8* %.idx.val, %Key
+  br i1 %1, label %bb5, label %bb2
+
+bb2:                                              ; preds = %bb
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %entry
+  %Scan.0 = phi %struct.LIST_NODE.0.16* [ %List, %entry ], [ %Scan.0.idx8.val, %bb2 ]
+  %2 = icmp eq %struct.LIST_NODE.0.16* %Scan.0, null
+  br i1 %2, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %bb
+  ret %struct.LIST_NODE.0.16* null
+}
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
new file mode 100644
index 0000000..4acdd9e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; rdar://10676853
+
+%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
+%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
+%union.anon = type { %struct.E_list_struct* }
+%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+
+@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+; CHECK: rdictionary_lookup:
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then10, %entry
+  %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
+  %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+  br i1 %cmp, label %if.end11, label %if.end
+
+if.end:                                           ; preds = %tailrecurse
+  %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+  %0 = load i8** %string, align 4
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %if.end
+  %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+  %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+  %2 = load i8* %1, align 1
+  %cmp.i = icmp eq i8 %2, 0
+  %.pre.i = load i8* %storemerge.i, align 1
+  br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
+
+land.end.i:                                       ; preds = %while.cond.i
+  %cmp4.i = icmp eq i8 %2, %.pre.i
+  br i1 %cmp4.i, label %while.body.i, label %while.end.i
+
+while.body.i:                                     ; preds = %land.end.i
+  %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
+  %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+  br label %while.cond.i
+
+while.end.i:                                      ; preds = %land.end.i
+  %cmp8.i = icmp eq i8 %2, 42
+  br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
+
+lor.lhs.false.i:                                  ; preds = %while.end.i, %while.cond.i
+  %3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
+  %cmp11.i = icmp eq i8 %.pre.i, 42
+  br i1 %cmp11.i, label %if.end3, label %dict_match.exit
+
+dict_match.exit:                                  ; preds = %lor.lhs.false.i
+  %cmp14.i = icmp eq i8 %3, 46
+  %conv16.i = sext i8 %3 to i32
+  %.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
+  %cmp18.i = icmp eq i8 %.pre.i, 46
+  %conv22.i = sext i8 %.pre.i to i32
+  %cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
+  %sub.i = sub nsw i32 %.conv16.i, %cond24.i
+  %cmp1 = icmp sgt i32 %sub.i, -1
+  br i1 %cmp1, label %if.end3, label %if.then10
+
+if.end3:                                          ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
+; CHECK: %if.end3
+; CHECK: cmp
+; CHECK-NOT: cbnz
+  %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
+  %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+  %4 = load %struct.Dict_node_struct** %right, align 4
+  tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+  %cmp4 = icmp eq i32 %storemerge1.i3, 0
+  br i1 %cmp4, label %if.then5, label %if.end8
+
+if.then5:                                         ; preds = %if.end3
+  %call6 = tail call fastcc i8* @xalloc(i32 20)
+  %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
+  %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
+  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %right7 = getelementptr inbounds i8* %call6, i32 16
+  %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
+  store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
+  store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+  br label %if.then10
+
+if.end8:                                          ; preds = %if.end3
+  %cmp9 = icmp slt i32 %storemerge1.i3, 1
+  br i1 %cmp9, label %if.then10, label %if.end11
+
+if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
+  %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+  %9 = load %struct.Dict_node_struct** %left, align 4
+  br label %tailrecurse
+
+if.end11:                                         ; preds = %if.end8, %tailrecurse
+  ret void
+}
+
+; Materializable
+declare hidden fastcc i8* @xalloc(i32) nounwind ssp
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
new file mode 100644
index 0000000..16b3a19
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; The double in the constant pool is 8-byte aligned, forcing the function
+; alignment.
+; CHECK: .align 3
+; CHECK: func
+;
+; Constant pool with 8-byte entry before 4-byte entry:
+; CHECK: .align 3
+; CHECK: LCPI
+; CHECK:	.long	2370821947
+; CHECK:	.long	1080815255
+; CHECK: LCPI
+; CHECK:	.long	1123477881
+define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
+entry:
+  %0 = load float* %x, align 4
+  %add = fadd float %0, 0x405EDD2F20000000
+  store float %add, float* %x, align 4
+  %1 = load double* %y, align 4
+  %add1 = fadd double %1, 2.234560e+02
+  store double %add1, double* %y, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
new file mode 100644
index 0000000..c98ca80
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; CHECK: f
+; This function is forced to spill a double.
+; Verify that the spill slot is properly aligned.
+;
+; The caller-saved r4 is used as a scratch register for stack realignment.
+; CHECK: push {r4, r7, lr}
+; CHECK: bic r4, r4, #7
+; CHECK: mov sp, r4
+define void @f(double* nocapture %p) nounwind ssp {
+entry:
+  %0 = load double* %p, align 4
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  tail call void @g() nounwind
+  store double %0, double* %p, align 4
+  ret void
+}
+
+; NEON: f
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #64
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; Stack pointer adjustment for the stack frame contents.
+; This could legally happen before the spills.
+; Since the spill slot is only 8 bytes, technically it would be fine to only
+; subtract #8 here. That would leave sp less aligned than some stack slots,
+; and would probably blow MFI's mind.
+; NEON: sub sp, #16
+; The epilog is free to use another scratch register than r4.
+; NEON: add r[[R4:[0-9]+]], sp, #16
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+declare void @g()
+
+; Spill 7 d-registers.
+define void @f7(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
+  ret void
+}
+
+; NEON: f7
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #56
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vstr d14, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9, d10, d11},
+; NEON: vld1.64 {d12, d13},
+; NEON: vldr d14,
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+; Spill 7 d-registers, leave a hole.
+define void @f3plus4(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  ret void
+}
+
+; Aligned spilling only works for contiguous ranges starting from d8.
+; The rest goes to the standard vpush instructions.
+; NEON: f3plus4
+; NEON: push {r4, r7, lr}
+; NEON: vpush {d12, d13, d14, d15}
+; NEON: sub.w r4, sp, #24
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vstr d10, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9},
+; NEON: vldr d10, [{{.*}}, #16]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: vpop {d12, d13, d14, d15}
+; NEON: pop
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
new file mode 100644
index 0000000..19d2385
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -0,0 +1,1400 @@
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This function comes from the Bullet test.  It is quite big, and exercises the
+; constant island pass a bit.  It has caused failures, including
+; <rdar://problem/10670199>
+;
+; It is unlikely that this code will continue to create the exact conditions
+; that broke the arm constant island pass in the past, but it is still useful to
+; force the pass to split basic blocks etc.
+;
+; The run lines above force the integrated assembler to be enabled so it can
+; catch any illegal displacements.  Other than that, we depend on the constant
+; island pass assertions.
+
+%class.btVector3 = type { [4 x float] }
+%class.btTransform = type { %class.btMatrix3x3, %class.btVector3 }
+%class.btMatrix3x3 = type { [3 x %class.btVector3] }
+%class.btCapsuleShape = type { %class.btConvexInternalShape, i32 }
+%class.btConvexInternalShape = type { %class.btConvexShape, %class.btVector3, %class.btVector3, float, float }
+%class.btConvexShape = type { %class.btCollisionShape }
+%class.btCollisionShape = type { i32 (...)**, i32, i8* }
+%class.RagDoll = type { i32 (...)**, %class.btDynamicsWorld*, [11 x %class.btCollisionShape*], [11 x %class.btRigidBody*], [10 x %class.btTypedConstraint*] }
+%class.btDynamicsWorld = type { %class.btCollisionWorld, void (%class.btDynamicsWorld*, float)*, void (%class.btDynamicsWorld*, float)*, i8*, %struct.btContactSolverInfo }
+%class.btCollisionWorld = type { i32 (...)**, %class.btAlignedObjectArray, %class.btDispatcher*, %struct.btDispatcherInfo, %class.btStackAlloc*, %class.btBroadphaseInterface*, %class.btIDebugDraw*, i8 }
+%class.btAlignedObjectArray = type { %class.btAlignedAllocator, i32, i32, %class.btCollisionObject**, i8 }
+%class.btAlignedAllocator = type { i8 }
+%class.btCollisionObject = type { i32 (...)**, %class.btTransform, %class.btTransform, %class.btVector3, %class.btVector3, %class.btVector3, i8, float, %struct.btBroadphaseProxy*, %class.btCollisionShape*, %class.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
+%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %class.btVector3, %class.btVector3 }
+%class.btDispatcher = type { i32 (...)** }
+%struct.btDispatcherInfo = type { float, i32, i32, float, i8, %class.btIDebugDraw*, i8, i8, i8, float, i8, float, %class.btStackAlloc* }
+%class.btIDebugDraw = type { i32 (...)** }
+%class.btStackAlloc = type opaque
+%class.btBroadphaseInterface = type { i32 (...)** }
+%struct.btContactSolverInfo = type { %struct.btContactSolverInfoData }
+%struct.btContactSolverInfoData = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
+%class.btRigidBody = type { %class.btCollisionObject, %class.btMatrix3x3, %class.btVector3, %class.btVector3, float, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, i8, float, float, float, float, float, float, %class.btMotionState*, %class.btAlignedObjectArray.22, i32, i32, i32 }
+%class.btMotionState = type { i32 (...)** }
+%class.btAlignedObjectArray.22 = type { %class.btAlignedAllocator.23, i32, i32, %class.btTypedConstraint**, i8 }
+%class.btAlignedAllocator.23 = type { i8 }
+%class.btTypedConstraint = type { i32 (...)**, %struct.btTypedObject, i32, i32, i8, %class.btRigidBody*, %class.btRigidBody*, float, float, %class.btVector3, %class.btVector3, %class.btVector3 }
+%struct.btTypedObject = type { i32 }
+%class.btHingeConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, float, float, float, float, float, i8, i8, i8, i8, i8, float }
+%class.btJacobianEntry = type { %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float }
+%class.btConeTwistConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, %class.btVector3, %class.btVector3, float, float, float, float, float, float, float, float, i8, i8, i8, i8, float, float, %class.btVector3, i8, i8, %class.btQuaternion, float, %class.btVector3 }
+%class.btQuaternion = type { %class.btQuadWord }
+%class.btQuadWord = type { [4 x float] }
+
+@_ZTV7RagDoll = external unnamed_addr constant [4 x i8*]
+
+declare noalias i8* @_Znwm(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3*, float*, float*, float*) unnamed_addr inlinehint ssp align 2
+
+declare void @_ZSt9terminatev()
+
+declare %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform*) unnamed_addr ssp align 2
+
+declare void @_ZN11btTransform11setIdentityEv(%class.btTransform*) ssp align 2
+
+declare void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform*, %class.btVector3*) nounwind inlinehint ssp align 2
+
+declare i8* @_ZN13btConvexShapenwEm(i32) inlinehint ssp align 2
+
+declare void @_ZN13btConvexShapedlEPv(i8*) inlinehint ssp align 2
+
+declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, float, float)
+
+declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2
+
+define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 {
+entry:
+  %retval = alloca %class.RagDoll*, align 4
+  %this.addr = alloca %class.RagDoll*, align 4
+  %ownerWorld.addr = alloca %class.btDynamicsWorld*, align 4
+  %positionOffset.addr = alloca %class.btVector3*, align 4
+  %scale.addr = alloca float, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %offset = alloca %class.btTransform, align 4
+  %transform = alloca %class.btTransform, align 4
+  %ref.tmp = alloca %class.btVector3, align 4
+  %ref.tmp97 = alloca %class.btVector3, align 4
+  %ref.tmp98 = alloca float, align 4
+  %ref.tmp99 = alloca float, align 4
+  %ref.tmp100 = alloca float, align 4
+  %ref.tmp102 = alloca %class.btTransform, align 4
+  %ref.tmp107 = alloca %class.btVector3, align 4
+  %ref.tmp108 = alloca %class.btVector3, align 4
+  %ref.tmp109 = alloca float, align 4
+  %ref.tmp110 = alloca float, align 4
+  %ref.tmp111 = alloca float, align 4
+  %ref.tmp113 = alloca %class.btTransform, align 4
+  %ref.tmp119 = alloca %class.btVector3, align 4
+  %ref.tmp120 = alloca %class.btVector3, align 4
+  %ref.tmp121 = alloca float, align 4
+  %ref.tmp122 = alloca float, align 4
+  %ref.tmp123 = alloca float, align 4
+  %ref.tmp125 = alloca %class.btTransform, align 4
+  %ref.tmp131 = alloca %class.btVector3, align 4
+  %ref.tmp132 = alloca %class.btVector3, align 4
+  %ref.tmp133 = alloca float, align 4
+  %ref.tmp134 = alloca float, align 4
+  %ref.tmp135 = alloca float, align 4
+  %ref.tmp137 = alloca %class.btTransform, align 4
+  %ref.tmp143 = alloca %class.btVector3, align 4
+  %ref.tmp144 = alloca %class.btVector3, align 4
+  %ref.tmp145 = alloca float, align 4
+  %ref.tmp146 = alloca float, align 4
+  %ref.tmp147 = alloca float, align 4
+  %ref.tmp149 = alloca %class.btTransform, align 4
+  %ref.tmp155 = alloca %class.btVector3, align 4
+  %ref.tmp156 = alloca %class.btVector3, align 4
+  %ref.tmp157 = alloca float, align 4
+  %ref.tmp158 = alloca float, align 4
+  %ref.tmp159 = alloca float, align 4
+  %ref.tmp161 = alloca %class.btTransform, align 4
+  %ref.tmp167 = alloca %class.btVector3, align 4
+  %ref.tmp168 = alloca %class.btVector3, align 4
+  %ref.tmp169 = alloca float, align 4
+  %ref.tmp170 = alloca float, align 4
+  %ref.tmp171 = alloca float, align 4
+  %ref.tmp173 = alloca %class.btTransform, align 4
+  %ref.tmp179 = alloca %class.btVector3, align 4
+  %ref.tmp180 = alloca %class.btVector3, align 4
+  %ref.tmp181 = alloca float, align 4
+  %ref.tmp182 = alloca float, align 4
+  %ref.tmp183 = alloca float, align 4
+  %ref.tmp186 = alloca %class.btTransform, align 4
+  %ref.tmp192 = alloca %class.btVector3, align 4
+  %ref.tmp193 = alloca %class.btVector3, align 4
+  %ref.tmp194 = alloca float, align 4
+  %ref.tmp195 = alloca float, align 4
+  %ref.tmp196 = alloca float, align 4
+  %ref.tmp199 = alloca %class.btTransform, align 4
+  %ref.tmp205 = alloca %class.btVector3, align 4
+  %ref.tmp206 = alloca %class.btVector3, align 4
+  %ref.tmp207 = alloca float, align 4
+  %ref.tmp208 = alloca float, align 4
+  %ref.tmp209 = alloca float, align 4
+  %ref.tmp212 = alloca %class.btTransform, align 4
+  %ref.tmp218 = alloca %class.btVector3, align 4
+  %ref.tmp219 = alloca %class.btVector3, align 4
+  %ref.tmp220 = alloca float, align 4
+  %ref.tmp221 = alloca float, align 4
+  %ref.tmp222 = alloca float, align 4
+  %ref.tmp225 = alloca %class.btTransform, align 4
+  %i = alloca i32, align 4
+  %hingeC = alloca %class.btHingeConstraint*, align 4
+  %coneC = alloca %class.btConeTwistConstraint*, align 4
+  %localA = alloca %class.btTransform, align 4
+  %localB = alloca %class.btTransform, align 4
+  %ref.tmp240 = alloca %class.btVector3, align 4
+  %ref.tmp241 = alloca %class.btVector3, align 4
+  %ref.tmp242 = alloca float, align 4
+  %ref.tmp243 = alloca float, align 4
+  %ref.tmp244 = alloca float, align 4
+  %ref.tmp247 = alloca %class.btVector3, align 4
+  %ref.tmp248 = alloca %class.btVector3, align 4
+  %ref.tmp249 = alloca float, align 4
+  %ref.tmp250 = alloca float, align 4
+  %ref.tmp251 = alloca float, align 4
+  %ref.tmp266 = alloca %class.btVector3, align 4
+  %ref.tmp267 = alloca %class.btVector3, align 4
+  %ref.tmp268 = alloca float, align 4
+  %ref.tmp269 = alloca float, align 4
+  %ref.tmp270 = alloca float, align 4
+  %ref.tmp273 = alloca %class.btVector3, align 4
+  %ref.tmp274 = alloca %class.btVector3, align 4
+  %ref.tmp275 = alloca float, align 4
+  %ref.tmp276 = alloca float, align 4
+  %ref.tmp277 = alloca float, align 4
+  %ref.tmp295 = alloca %class.btVector3, align 4
+  %ref.tmp296 = alloca %class.btVector3, align 4
+  %ref.tmp297 = alloca float, align 4
+  %ref.tmp298 = alloca float, align 4
+  %ref.tmp299 = alloca float, align 4
+  %ref.tmp302 = alloca %class.btVector3, align 4
+  %ref.tmp303 = alloca %class.btVector3, align 4
+  %ref.tmp304 = alloca float, align 4
+  %ref.tmp305 = alloca float, align 4
+  %ref.tmp306 = alloca float, align 4
+  %ref.tmp324 = alloca %class.btVector3, align 4
+  %ref.tmp325 = alloca %class.btVector3, align 4
+  %ref.tmp326 = alloca float, align 4
+  %ref.tmp327 = alloca float, align 4
+  %ref.tmp328 = alloca float, align 4
+  %ref.tmp331 = alloca %class.btVector3, align 4
+  %ref.tmp332 = alloca %class.btVector3, align 4
+  %ref.tmp333 = alloca float, align 4
+  %ref.tmp334 = alloca float, align 4
+  %ref.tmp335 = alloca float, align 4
+  %ref.tmp353 = alloca %class.btVector3, align 4
+  %ref.tmp354 = alloca %class.btVector3, align 4
+  %ref.tmp355 = alloca float, align 4
+  %ref.tmp356 = alloca float, align 4
+  %ref.tmp357 = alloca float, align 4
+  %ref.tmp360 = alloca %class.btVector3, align 4
+  %ref.tmp361 = alloca %class.btVector3, align 4
+  %ref.tmp362 = alloca float, align 4
+  %ref.tmp363 = alloca float, align 4
+  %ref.tmp364 = alloca float, align 4
+  %ref.tmp382 = alloca %class.btVector3, align 4
+  %ref.tmp383 = alloca %class.btVector3, align 4
+  %ref.tmp384 = alloca float, align 4
+  %ref.tmp385 = alloca float, align 4
+  %ref.tmp386 = alloca float, align 4
+  %ref.tmp389 = alloca %class.btVector3, align 4
+  %ref.tmp390 = alloca %class.btVector3, align 4
+  %ref.tmp391 = alloca float, align 4
+  %ref.tmp392 = alloca float, align 4
+  %ref.tmp393 = alloca float, align 4
+  %ref.tmp411 = alloca %class.btVector3, align 4
+  %ref.tmp412 = alloca %class.btVector3, align 4
+  %ref.tmp413 = alloca float, align 4
+  %ref.tmp414 = alloca float, align 4
+  %ref.tmp415 = alloca float, align 4
+  %ref.tmp418 = alloca %class.btVector3, align 4
+  %ref.tmp419 = alloca %class.btVector3, align 4
+  %ref.tmp420 = alloca float, align 4
+  %ref.tmp421 = alloca float, align 4
+  %ref.tmp422 = alloca float, align 4
+  %ref.tmp440 = alloca %class.btVector3, align 4
+  %ref.tmp441 = alloca %class.btVector3, align 4
+  %ref.tmp442 = alloca float, align 4
+  %ref.tmp443 = alloca float, align 4
+  %ref.tmp444 = alloca float, align 4
+  %ref.tmp447 = alloca %class.btVector3, align 4
+  %ref.tmp448 = alloca %class.btVector3, align 4
+  %ref.tmp449 = alloca float, align 4
+  %ref.tmp450 = alloca float, align 4
+  %ref.tmp451 = alloca float, align 4
+  %ref.tmp469 = alloca %class.btVector3, align 4
+  %ref.tmp470 = alloca %class.btVector3, align 4
+  %ref.tmp471 = alloca float, align 4
+  %ref.tmp472 = alloca float, align 4
+  %ref.tmp473 = alloca float, align 4
+  %ref.tmp476 = alloca %class.btVector3, align 4
+  %ref.tmp477 = alloca %class.btVector3, align 4
+  %ref.tmp478 = alloca float, align 4
+  %ref.tmp479 = alloca float, align 4
+  %ref.tmp480 = alloca float, align 4
+  %ref.tmp498 = alloca %class.btVector3, align 4
+  %ref.tmp499 = alloca %class.btVector3, align 4
+  %ref.tmp500 = alloca float, align 4
+  %ref.tmp501 = alloca float, align 4
+  %ref.tmp502 = alloca float, align 4
+  %ref.tmp505 = alloca %class.btVector3, align 4
+  %ref.tmp506 = alloca %class.btVector3, align 4
+  %ref.tmp507 = alloca float, align 4
+  %ref.tmp508 = alloca float, align 4
+  %ref.tmp509 = alloca float, align 4
+  store %class.RagDoll* %this, %class.RagDoll** %this.addr, align 4
+  store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
+  store float %scale, float* %scale.addr, align 4
+  %this1 = load %class.RagDoll** %this.addr
+  store %class.RagDoll* %this1, %class.RagDoll** %retval
+  %0 = bitcast %class.RagDoll* %this1 to i8***
+  store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+  %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
+  %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %2 = bitcast i8* %call to %class.btCapsuleShape*
+  %3 = load float* %scale.addr, align 4
+  %mul = fmul float 0x3FC3333340000000, %3
+  %4 = load float* %scale.addr, align 4
+  %mul2 = fmul float 0x3FC99999A0000000, %4
+  %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
+  %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+  store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
+  %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %6 = bitcast i8* %call5 to %class.btCapsuleShape*
+  %7 = load float* %scale.addr, align 4
+  %mul6 = fmul float 0x3FC3333340000000, %7
+  %8 = load float* %scale.addr, align 4
+  %mul7 = fmul float 0x3FD1EB8520000000, %8
+  %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
+          to label %invoke.cont9 unwind label %lpad8
+
+invoke.cont9:                                     ; preds = %invoke.cont
+  %9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
+  %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+  store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
+  %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %10 = bitcast i8* %call14 to %class.btCapsuleShape*
+  %11 = load float* %scale.addr, align 4
+  %mul15 = fmul float 0x3FB99999A0000000, %11
+  %12 = load float* %scale.addr, align 4
+  %mul16 = fmul float 0x3FA99999A0000000, %12
+  %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
+          to label %invoke.cont18 unwind label %lpad17
+
+invoke.cont18:                                    ; preds = %invoke.cont9
+  %13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
+  %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+  store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
+  %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %14 = bitcast i8* %call23 to %class.btCapsuleShape*
+  %15 = load float* %scale.addr, align 4
+  %mul24 = fmul float 0x3FB1EB8520000000, %15
+  %16 = load float* %scale.addr, align 4
+  %mul25 = fmul float 0x3FDCCCCCC0000000, %16
+  %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
+          to label %invoke.cont27 unwind label %lpad26
+
+invoke.cont27:                                    ; preds = %invoke.cont18
+  %17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
+  %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+  store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
+  %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %18 = bitcast i8* %call32 to %class.btCapsuleShape*
+  %19 = load float* %scale.addr, align 4
+  %mul33 = fmul float 0x3FA99999A0000000, %19
+  %20 = load float* %scale.addr, align 4
+  %mul34 = fmul float 0x3FD7AE1480000000, %20
+  %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont27
+  %21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
+  %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+  store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
+  %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %22 = bitcast i8* %call41 to %class.btCapsuleShape*
+  %23 = load float* %scale.addr, align 4
+  %mul42 = fmul float 0x3FB1EB8520000000, %23
+  %24 = load float* %scale.addr, align 4
+  %mul43 = fmul float 0x3FDCCCCCC0000000, %24
+  %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
+          to label %invoke.cont45 unwind label %lpad44
+
+invoke.cont45:                                    ; preds = %invoke.cont36
+  %25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
+  %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+  store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
+  %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %26 = bitcast i8* %call50 to %class.btCapsuleShape*
+  %27 = load float* %scale.addr, align 4
+  %mul51 = fmul float 0x3FA99999A0000000, %27
+  %28 = load float* %scale.addr, align 4
+  %mul52 = fmul float 0x3FD7AE1480000000, %28
+  %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
+          to label %invoke.cont54 unwind label %lpad53
+
+invoke.cont54:                                    ; preds = %invoke.cont45
+  %29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
+  %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+  store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
+  %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %30 = bitcast i8* %call59 to %class.btCapsuleShape*
+  %31 = load float* %scale.addr, align 4
+  %mul60 = fmul float 0x3FA99999A0000000, %31
+  %32 = load float* %scale.addr, align 4
+  %mul61 = fmul float 0x3FD51EB860000000, %32
+  %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
+          to label %invoke.cont63 unwind label %lpad62
+
+invoke.cont63:                                    ; preds = %invoke.cont54
+  %33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
+  %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+  store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
+  %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %34 = bitcast i8* %call68 to %class.btCapsuleShape*
+  %35 = load float* %scale.addr, align 4
+  %mul69 = fmul float 0x3FA47AE140000000, %35
+  %36 = load float* %scale.addr, align 4
+  %mul70 = fmul float 2.500000e-01, %36
+  %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
+          to label %invoke.cont72 unwind label %lpad71
+
+invoke.cont72:                                    ; preds = %invoke.cont63
+  %37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
+  %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+  store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
+  %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %38 = bitcast i8* %call77 to %class.btCapsuleShape*
+  %39 = load float* %scale.addr, align 4
+  %mul78 = fmul float 0x3FA99999A0000000, %39
+  %40 = load float* %scale.addr, align 4
+  %mul79 = fmul float 0x3FD51EB860000000, %40
+  %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
+          to label %invoke.cont81 unwind label %lpad80
+
+invoke.cont81:                                    ; preds = %invoke.cont72
+  %41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
+  %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+  store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
+  %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %42 = bitcast i8* %call86 to %class.btCapsuleShape*
+  %43 = load float* %scale.addr, align 4
+  %mul87 = fmul float 0x3FA47AE140000000, %43
+  %44 = load float* %scale.addr, align 4
+  %mul88 = fmul float 2.500000e-01, %44
+  %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
+          to label %invoke.cont90 unwind label %lpad89
+
+invoke.cont90:                                    ; preds = %invoke.cont81
+  %45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
+  %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+  store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
+  %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
+  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
+  %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp98, align 4
+  store float 1.000000e+00, float* %ref.tmp99, align 4
+  store float 0.000000e+00, float* %ref.tmp100, align 4
+  %call101 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp97, float* %ref.tmp98, float* %ref.tmp99, float* %ref.tmp100)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
+  %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+  store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp109, align 4
+  store float 0x3FF3333340000000, float* %ref.tmp110, align 4
+  store float 0.000000e+00, float* %ref.tmp111, align 4
+  %call112 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp108, float* %ref.tmp109, float* %ref.tmp110, float* %ref.tmp111)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
+  %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+  store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp121, align 4
+  store float 0x3FF99999A0000000, float* %ref.tmp122, align 4
+  store float 0.000000e+00, float* %ref.tmp123, align 4
+  %call124 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp120, float* %ref.tmp121, float* %ref.tmp122, float* %ref.tmp123)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
+  %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+  store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp134, align 4
+  store float 0.000000e+00, float* %ref.tmp135, align 4
+  %call136 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp132, float* %ref.tmp133, float* %ref.tmp134, float* %ref.tmp135)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
+  %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+  store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp146, align 4
+  store float 0.000000e+00, float* %ref.tmp147, align 4
+  %call148 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp144, float* %ref.tmp145, float* %ref.tmp146, float* %ref.tmp147)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
+  %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+  store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp158, align 4
+  store float 0.000000e+00, float* %ref.tmp159, align 4
+  %call160 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp156, float* %ref.tmp157, float* %ref.tmp158, float* %ref.tmp159)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
+  %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+  store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp170, align 4
+  store float 0.000000e+00, float* %ref.tmp171, align 4
+  %call172 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp168, float* %ref.tmp169, float* %ref.tmp170, float* %ref.tmp171)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
+  %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+  store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFD6666660000000, float* %ref.tmp181, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp182, align 4
+  store float 0.000000e+00, float* %ref.tmp183, align 4
+  %call184 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp180, float* %ref.tmp181, float* %ref.tmp182, float* %ref.tmp183)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp179, float* %scale.addr, %class.btVector3* %ref.tmp180)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp179)
+  %call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
+  %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+  store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFE6666660000000, float* %ref.tmp194, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp195, align 4
+  store float 0.000000e+00, float* %ref.tmp196, align 4
+  %call197 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp193, float* %ref.tmp194, float* %ref.tmp195, float* %ref.tmp196)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp192, float* %scale.addr, %class.btVector3* %ref.tmp193)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp192)
+  %call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
+  %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+  store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FD6666660000000, float* %ref.tmp207, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp208, align 4
+  store float 0.000000e+00, float* %ref.tmp209, align 4
+  %call210 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp206, float* %ref.tmp207, float* %ref.tmp208, float* %ref.tmp209)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp205, float* %scale.addr, %class.btVector3* %ref.tmp206)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp205)
+  %call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
+  %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+  store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FE6666660000000, float* %ref.tmp220, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp221, align 4
+  store float 0.000000e+00, float* %ref.tmp222, align 4
+  %call223 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp219, float* %ref.tmp220, float* %ref.tmp221, float* %ref.tmp222)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp218, float* %scale.addr, %class.btVector3* %ref.tmp219)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp218)
+  %call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
+  %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+  store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %invoke.cont90
+  %58 = load i32* %i, align 4
+  %cmp = icmp slt i32 %58, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %59 = load i32* %i, align 4
+  %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
+  %61 = load i32* %i, align 4
+  %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
+  call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
+  %64 = load i32* %i, align 4
+  %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %66 = load i32* %i, align 4
+  %inc = add nsw i32 %66, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+lpad:                                             ; preds = %entry
+  %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %68 = extractvalue { i8*, i32 } %67, 0
+  store i8* %68, i8** %exn.slot
+  %69 = extractvalue { i8*, i32 } %67, 1
+  store i32 %69, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call)
+          to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:                                     ; preds = %lpad
+  br label %eh.resume
+
+lpad8:                                            ; preds = %invoke.cont
+  %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %71 = extractvalue { i8*, i32 } %70, 0
+  store i8* %71, i8** %exn.slot
+  %72 = extractvalue { i8*, i32 } %70, 1
+  store i32 %72, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call5)
+          to label %invoke.cont11 unwind label %terminate.lpad
+
+invoke.cont11:                                    ; preds = %lpad8
+  br label %eh.resume
+
+lpad17:                                           ; preds = %invoke.cont9
+  %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %74 = extractvalue { i8*, i32 } %73, 0
+  store i8* %74, i8** %exn.slot
+  %75 = extractvalue { i8*, i32 } %73, 1
+  store i32 %75, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call14)
+          to label %invoke.cont20 unwind label %terminate.lpad
+
+invoke.cont20:                                    ; preds = %lpad17
+  br label %eh.resume
+
+lpad26:                                           ; preds = %invoke.cont18
+  %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %77 = extractvalue { i8*, i32 } %76, 0
+  store i8* %77, i8** %exn.slot
+  %78 = extractvalue { i8*, i32 } %76, 1
+  store i32 %78, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call23)
+          to label %invoke.cont29 unwind label %terminate.lpad
+
+invoke.cont29:                                    ; preds = %lpad26
+  br label %eh.resume
+
+lpad35:                                           ; preds = %invoke.cont27
+  %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %80 = extractvalue { i8*, i32 } %79, 0
+  store i8* %80, i8** %exn.slot
+  %81 = extractvalue { i8*, i32 } %79, 1
+  store i32 %81, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call32)
+          to label %invoke.cont38 unwind label %terminate.lpad
+
+invoke.cont38:                                    ; preds = %lpad35
+  br label %eh.resume
+
+lpad44:                                           ; preds = %invoke.cont36
+  %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %83 = extractvalue { i8*, i32 } %82, 0
+  store i8* %83, i8** %exn.slot
+  %84 = extractvalue { i8*, i32 } %82, 1
+  store i32 %84, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call41)
+          to label %invoke.cont47 unwind label %terminate.lpad
+
+invoke.cont47:                                    ; preds = %lpad44
+  br label %eh.resume
+
+lpad53:                                           ; preds = %invoke.cont45
+  %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %86 = extractvalue { i8*, i32 } %85, 0
+  store i8* %86, i8** %exn.slot
+  %87 = extractvalue { i8*, i32 } %85, 1
+  store i32 %87, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call50)
+          to label %invoke.cont56 unwind label %terminate.lpad
+
+invoke.cont56:                                    ; preds = %lpad53
+  br label %eh.resume
+
+lpad62:                                           ; preds = %invoke.cont54
+  %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %89 = extractvalue { i8*, i32 } %88, 0
+  store i8* %89, i8** %exn.slot
+  %90 = extractvalue { i8*, i32 } %88, 1
+  store i32 %90, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call59)
+          to label %invoke.cont65 unwind label %terminate.lpad
+
+invoke.cont65:                                    ; preds = %lpad62
+  br label %eh.resume
+
+lpad71:                                           ; preds = %invoke.cont63
+  %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %92 = extractvalue { i8*, i32 } %91, 0
+  store i8* %92, i8** %exn.slot
+  %93 = extractvalue { i8*, i32 } %91, 1
+  store i32 %93, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call68)
+          to label %invoke.cont74 unwind label %terminate.lpad
+
+invoke.cont74:                                    ; preds = %lpad71
+  br label %eh.resume
+
+lpad80:                                           ; preds = %invoke.cont72
+  %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %95 = extractvalue { i8*, i32 } %94, 0
+  store i8* %95, i8** %exn.slot
+  %96 = extractvalue { i8*, i32 } %94, 1
+  store i32 %96, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call77)
+          to label %invoke.cont83 unwind label %terminate.lpad
+
+invoke.cont83:                                    ; preds = %lpad80
+  br label %eh.resume
+
+lpad89:                                           ; preds = %invoke.cont81
+  %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %98 = extractvalue { i8*, i32 } %97, 0
+  store i8* %98, i8** %exn.slot
+  %99 = extractvalue { i8*, i32 } %97, 1
+  store i32 %99, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call86)
+          to label %invoke.cont92 unwind label %terminate.lpad
+
+invoke.cont92:                                    ; preds = %lpad89
+  br label %eh.resume
+
+for.end:                                          ; preds = %for.cond
+  %call237 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localA)
+  %call238 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localB)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call239 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call239, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp242, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp243, align 4
+  store float 0.000000e+00, float* %ref.tmp244, align 4
+  %call245 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp241, float* %ref.tmp242, float* %ref.tmp243, float* %ref.tmp244)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp240, float* %scale.addr, %class.btVector3* %ref.tmp241)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp240)
+  %call246 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call246, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp249, align 4
+  store float 0xBFC3333340000000, float* %ref.tmp250, align 4
+  store float 0.000000e+00, float* %ref.tmp251, align 4
+  %call252 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp248, float* %ref.tmp249, float* %ref.tmp250, float* %ref.tmp251)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp247, float* %scale.addr, %class.btVector3* %ref.tmp248)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
+  %call253 = call noalias i8* @_Znwm(i32 780)
+  %100 = bitcast i8* %call253 to %class.btHingeConstraint*
+  %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %for.end
+  store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
+  %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+  store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
+  %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call265 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call265, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp268, align 4
+  store float 0x3FD3333340000000, float* %ref.tmp269, align 4
+  store float 0.000000e+00, float* %ref.tmp270, align 4
+  %call271 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp267, float* %ref.tmp268, float* %ref.tmp269, float* %ref.tmp270)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp266, float* %scale.addr, %class.btVector3* %ref.tmp267)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp266)
+  %call272 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call272, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp275, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp276, align 4
+  store float 0.000000e+00, float* %ref.tmp277, align 4
+  %call278 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp274, float* %ref.tmp275, float* %ref.tmp276, float* %ref.tmp277)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp273, float* %scale.addr, %class.btVector3* %ref.tmp274)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
+  %call279 = call noalias i8* @_Znwm(i32 628)
+  %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
+  %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont285 unwind label %lpad284
+
+invoke.cont285:                                   ; preds = %invoke.cont259
+  store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
+  %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+  store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
+  %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call294 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call294, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0xBFC70A3D80000000, float* %ref.tmp297, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp298, align 4
+  store float 0.000000e+00, float* %ref.tmp299, align 4
+  %call300 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp296, float* %ref.tmp297, float* %ref.tmp298, float* %ref.tmp299)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp295, float* %scale.addr, %class.btVector3* %ref.tmp296)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp295)
+  %call301 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call301, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0.000000e+00, float* %ref.tmp304, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp305, align 4
+  store float 0.000000e+00, float* %ref.tmp306, align 4
+  %call307 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp303, float* %ref.tmp304, float* %ref.tmp305, float* %ref.tmp306)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp302, float* %scale.addr, %class.btVector3* %ref.tmp303)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
+  %call308 = call noalias i8* @_Znwm(i32 628)
+  %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
+  %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont314 unwind label %lpad313
+
+invoke.cont314:                                   ; preds = %invoke.cont285
+  store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
+  %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+  store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
+  %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call323 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call323, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp326, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp327, align 4
+  store float 0.000000e+00, float* %ref.tmp328, align 4
+  %call329 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp325, float* %ref.tmp326, float* %ref.tmp327, float* %ref.tmp328)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp324, float* %scale.addr, %class.btVector3* %ref.tmp325)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp324)
+  %call330 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call330, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp333, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp334, align 4
+  store float 0.000000e+00, float* %ref.tmp335, align 4
+  %call336 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp332, float* %ref.tmp333, float* %ref.tmp334, float* %ref.tmp335)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp331, float* %scale.addr, %class.btVector3* %ref.tmp332)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
+  %call337 = call noalias i8* @_Znwm(i32 780)
+  %130 = bitcast i8* %call337 to %class.btHingeConstraint*
+  %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont343 unwind label %lpad342
+
+invoke.cont343:                                   ; preds = %invoke.cont314
+  store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
+  %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+  store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
+  %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call352 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call352, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0x3FC70A3D80000000, float* %ref.tmp355, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp356, align 4
+  store float 0.000000e+00, float* %ref.tmp357, align 4
+  %call358 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp354, float* %ref.tmp355, float* %ref.tmp356, float* %ref.tmp357)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp353, float* %scale.addr, %class.btVector3* %ref.tmp354)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp353)
+  %call359 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call359, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp362, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp363, align 4
+  store float 0.000000e+00, float* %ref.tmp364, align 4
+  %call365 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp361, float* %ref.tmp362, float* %ref.tmp363, float* %ref.tmp364)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp360, float* %scale.addr, %class.btVector3* %ref.tmp361)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
+  %call366 = call noalias i8* @_Znwm(i32 628)
+  %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
+  %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont372 unwind label %lpad371
+
+invoke.cont372:                                   ; preds = %invoke.cont343
+  store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
+  %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+  store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
+  %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call381 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call381, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp384, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp385, align 4
+  store float 0.000000e+00, float* %ref.tmp386, align 4
+  %call387 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp383, float* %ref.tmp384, float* %ref.tmp385, float* %ref.tmp386)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp382, float* %scale.addr, %class.btVector3* %ref.tmp383)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp382)
+  %call388 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call388, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp391, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp392, align 4
+  store float 0.000000e+00, float* %ref.tmp393, align 4
+  %call394 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp390, float* %ref.tmp391, float* %ref.tmp392, float* %ref.tmp393)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp389, float* %scale.addr, %class.btVector3* %ref.tmp390)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
+  %call395 = call noalias i8* @_Znwm(i32 780)
+  %150 = bitcast i8* %call395 to %class.btHingeConstraint*
+  %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont401 unwind label %lpad400
+
+invoke.cont401:                                   ; preds = %invoke.cont372
+  store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
+  %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+  store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
+  %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call410 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call410, float 0.000000e+00, float 0.000000e+00, float 0x400921FB60000000)
+  store float 0xBFC99999A0000000, float* %ref.tmp413, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp414, align 4
+  store float 0.000000e+00, float* %ref.tmp415, align 4
+  %call416 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp412, float* %ref.tmp413, float* %ref.tmp414, float* %ref.tmp415)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp411, float* %scale.addr, %class.btVector3* %ref.tmp412)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp411)
+  %call417 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call417, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp420, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp421, align 4
+  store float 0.000000e+00, float* %ref.tmp422, align 4
+  %call423 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp419, float* %ref.tmp420, float* %ref.tmp421, float* %ref.tmp422)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp418, float* %scale.addr, %class.btVector3* %ref.tmp419)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
+  %call424 = call noalias i8* @_Znwm(i32 628)
+  %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
+  %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont430 unwind label %lpad429
+
+invoke.cont430:                                   ; preds = %invoke.cont401
+  store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
+  %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+  store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
+  %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call439 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call439, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp442, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp443, align 4
+  store float 0.000000e+00, float* %ref.tmp444, align 4
+  %call445 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp441, float* %ref.tmp442, float* %ref.tmp443, float* %ref.tmp444)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp440, float* %scale.addr, %class.btVector3* %ref.tmp441)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp440)
+  %call446 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call446, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp449, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp450, align 4
+  store float 0.000000e+00, float* %ref.tmp451, align 4
+  %call452 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp448, float* %ref.tmp449, float* %ref.tmp450, float* %ref.tmp451)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp447, float* %scale.addr, %class.btVector3* %ref.tmp448)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
+  %call453 = call noalias i8* @_Znwm(i32 780)
+  %170 = bitcast i8* %call453 to %class.btHingeConstraint*
+  %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont459 unwind label %lpad458
+
+invoke.cont459:                                   ; preds = %invoke.cont430
+  store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
+  %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+  store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
+  %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call468 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call468, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
+  store float 0x3FC99999A0000000, float* %ref.tmp471, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp472, align 4
+  store float 0.000000e+00, float* %ref.tmp473, align 4
+  %call474 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp470, float* %ref.tmp471, float* %ref.tmp472, float* %ref.tmp473)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp469, float* %scale.addr, %class.btVector3* %ref.tmp470)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp469)
+  %call475 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call475, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp478, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp479, align 4
+  store float 0.000000e+00, float* %ref.tmp480, align 4
+  %call481 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp477, float* %ref.tmp478, float* %ref.tmp479, float* %ref.tmp480)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp476, float* %scale.addr, %class.btVector3* %ref.tmp477)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
+  %call482 = call noalias i8* @_Znwm(i32 628)
+  %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
+  %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont488 unwind label %lpad487
+
+invoke.cont488:                                   ; preds = %invoke.cont459
+  store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
+  %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+  store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
+  %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call497 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call497, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp500, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp501, align 4
+  store float 0.000000e+00, float* %ref.tmp502, align 4
+  %call503 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp499, float* %ref.tmp500, float* %ref.tmp501, float* %ref.tmp502)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp498, float* %scale.addr, %class.btVector3* %ref.tmp499)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp498)
+  %call504 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call504, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp507, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp508, align 4
+  store float 0.000000e+00, float* %ref.tmp509, align 4
+  %call510 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp506, float* %ref.tmp507, float* %ref.tmp508, float* %ref.tmp509)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp505, float* %scale.addr, %class.btVector3* %ref.tmp506)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
+  %call511 = call noalias i8* @_Znwm(i32 780)
+  %190 = bitcast i8* %call511 to %class.btHingeConstraint*
+  %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont517 unwind label %lpad516
+
+invoke.cont517:                                   ; preds = %invoke.cont488
+  store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
+  %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+  store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
+  %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
+  %200 = load %class.RagDoll** %retval
+  ret %class.RagDoll* %200
+
+lpad258:                                          ; preds = %for.end
+  %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %202 = extractvalue { i8*, i32 } %201, 0
+  store i8* %202, i8** %exn.slot
+  %203 = extractvalue { i8*, i32 } %201, 1
+  store i32 %203, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call253) nounwind
+  br label %eh.resume
+
+lpad284:                                          ; preds = %invoke.cont259
+  %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %205 = extractvalue { i8*, i32 } %204, 0
+  store i8* %205, i8** %exn.slot
+  %206 = extractvalue { i8*, i32 } %204, 1
+  store i32 %206, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call279) nounwind
+  br label %eh.resume
+
+lpad313:                                          ; preds = %invoke.cont285
+  %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %208 = extractvalue { i8*, i32 } %207, 0
+  store i8* %208, i8** %exn.slot
+  %209 = extractvalue { i8*, i32 } %207, 1
+  store i32 %209, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call308) nounwind
+  br label %eh.resume
+
+lpad342:                                          ; preds = %invoke.cont314
+  %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %211 = extractvalue { i8*, i32 } %210, 0
+  store i8* %211, i8** %exn.slot
+  %212 = extractvalue { i8*, i32 } %210, 1
+  store i32 %212, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call337) nounwind
+  br label %eh.resume
+
+lpad371:                                          ; preds = %invoke.cont343
+  %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %214 = extractvalue { i8*, i32 } %213, 0
+  store i8* %214, i8** %exn.slot
+  %215 = extractvalue { i8*, i32 } %213, 1
+  store i32 %215, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call366) nounwind
+  br label %eh.resume
+
+lpad400:                                          ; preds = %invoke.cont372
+  %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %217 = extractvalue { i8*, i32 } %216, 0
+  store i8* %217, i8** %exn.slot
+  %218 = extractvalue { i8*, i32 } %216, 1
+  store i32 %218, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call395) nounwind
+  br label %eh.resume
+
+lpad429:                                          ; preds = %invoke.cont401
+  %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %220 = extractvalue { i8*, i32 } %219, 0
+  store i8* %220, i8** %exn.slot
+  %221 = extractvalue { i8*, i32 } %219, 1
+  store i32 %221, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call424) nounwind
+  br label %eh.resume
+
+lpad458:                                          ; preds = %invoke.cont430
+  %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %223 = extractvalue { i8*, i32 } %222, 0
+  store i8* %223, i8** %exn.slot
+  %224 = extractvalue { i8*, i32 } %222, 1
+  store i32 %224, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call453) nounwind
+  br label %eh.resume
+
+lpad487:                                          ; preds = %invoke.cont459
+  %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %226 = extractvalue { i8*, i32 } %225, 0
+  store i8* %226, i8** %exn.slot
+  %227 = extractvalue { i8*, i32 } %225, 1
+  store i32 %227, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call482) nounwind
+  br label %eh.resume
+
+lpad516:                                          ; preds = %invoke.cont488
+  %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %229 = extractvalue { i8*, i32 } %228, 0
+  store i8* %229, i8** %exn.slot
+  %230 = extractvalue { i8*, i32 } %228, 1
+  store i32 %230, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call511) nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
+  %exn = load i8** %exn.slot
+  %sel = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val526
+
+terminate.lpad:                                   ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad
+  %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_ZmlRKfRK9btVector3(%class.btVector3* noalias sret, float*, %class.btVector3*) inlinehint ssp
+
+declare %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll*, float, %class.btTransform*, %class.btCollisionShape*) ssp align 2
+
+declare void @_ZNK11btTransformmlERKS_(%class.btTransform* noalias sret, %class.btTransform*, %class.btTransform*) inlinehint ssp align 2
+
+declare void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3*, float, float, float) ssp align 2
+
+declare void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody*, float, float)
+
+declare void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject*, float) nounwind ssp align 2
+
+declare void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody*, float, float) nounwind ssp align 2
+
+declare %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*, i1 zeroext)
+
+declare void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint*, float, float, float, float, float) ssp align 2
+
+declare %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*)
+
+declare void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint*, float, float, float, float, float, float) nounwind ssp align 2
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index d8b51ec..cb4d080 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -47,3 +47,32 @@ bb2:                                              ; preds = %bb
   tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
+
+; PR12389
+; Make sure the DPair register class can spill.
+define void @pr12389(i8* %p) nounwind ssp {
+entry:
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+  tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+; <rdar://problem/11101911>
+; When an strd is expanded into two str instructions, make sure the first str
+; doesn't kill the base register. This can happen if the base register is the
+; same as the data register.
+%class = type { i8*, %class*, i32 }
+define void @f11101911(%class* %this, i32 %num) ssp align 2 {
+entry:
+  %p1 = getelementptr inbounds %class* %this, i32 0, i32 1
+  %p2 = getelementptr inbounds %class* %this, i32 0, i32 2
+  tail call void asm sideeffect "", "~{r1},~{r3},~{r5},~{r11},~{r13}"() nounwind
+  store %class* %this, %class** %p1, align 4
+  store i32 %num, i32* %p2, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb2/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
new file mode 100644
index 0000000..aef6f85
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; This test case would clobber the outgoing call arguments by writing to the
+; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+
+; CHECK: main
+; CHECK: vmov.f64
+; Adjust SP for the large call
+; CHECK: sub sp,
+; CHECK: mov [[FR:r[0-9]+]], sp
+; Store to call frame + #4
+; CHECK: str{{.*\[}}[[FR]], #4]
+; Don't clobber that store until the call.
+; CHECK-NOT: [sp, #4]
+; CHECK: variadic
+
+define i32 @main() ssp {
+entry:
+  %d = alloca double, align 8
+  store double 1.000000e+00, double* %d, align 8
+  %0 = load double* %d, align 8
+  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  ret i32 0
+}
+
+declare void @variadic(i8*, i8*, i8*, ...)
+
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 4597ba5..36544d1 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7352504
 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
 
@@ -46,10 +46,10 @@ bb119:                                            ; preds = %bb20, %bb20
 
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9ff114e..9aaa821 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
 
 ; This now reduces to a single induction variable.
 
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
@@ -20,9 +15,9 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 46937fc..8285742 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -51,12 +51,11 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
-; CHECK: vdup.32 q{{.*}}, [[R3]]
+; CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   br i1 undef, label %bb1, label %bb2
 
 bb1:
-; CHECK-NEXT: %bb1
+; CHECK: %bb1
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8* %ptr1, i32 %tmp1
@@ -95,8 +94,8 @@ bb.nph:
 
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK: eor.w
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 0992fa8..893bd0f 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 ; rdar://7354379
 
-declare double @floor(double) nounwind readnone
+declare double @foo(double) nounwind readnone
 
 define void @t(i32 %c, double %b) {
 entry:
@@ -24,9 +24,8 @@ bb7:                                              ; preds = %bb3
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
-  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
 bb11:                                             ; preds = %bb9, %bb7
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 00a54a0..f7e9665 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -3,8 +3,8 @@
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: clz r
-    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
     ret i32 %tmp
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 2c57348..f577f79 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 4f2b7c1..b2328e7 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index d3b781d..2e83ea1 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;CHECK: ldrd r2, r3, [r2]
+; CHECK: ldrd
+; CHECK: umull
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 24c45c5..58f9add 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -15,5 +15,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 ; CHECK: f2:
-; CHECK: 	muls	r0, r0, r1
+; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index bb97d97..ac059bd 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: f1:
-; CHECK: muls r0, r0, r1
+; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index ceefabb..74729fd 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -3,8 +3,8 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
-; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
+; CHECK: add r0, r1
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
deleted file mode 100644
index c62fee1..0000000
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; Linear scan does not currently coalesce any two variables that have
-; overlapping live intervals. When two overlapping intervals have the same
-; value, they can be joined though.
-;
-; RUN: llc < %s -march=x86 -regalloc=linearscan | \
-; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
-
-define i64 @test(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index a871ea1..38bca28 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 35
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -30,7 +30,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
 	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
 	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
@@ -48,4 +48,4 @@ return:		; preds = %cond_true, %entry
 	ret void
 }
 
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 6f8b89c..24aa5b9 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index d1fc70d..7d21b71 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -10,9 +10,10 @@ entry:
   invoke void @raise()
           to label %eh_then unwind label %unwind
 
-unwind:                                           ; preds = %entry
-  %eh_ptr = tail call i8* @llvm.eh.exception()
-  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+unwind:                                           ; preds = %entry 
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
+              catch i8* @error
+  %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
   %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
   %tmp2 = icmp eq i32 %eh_select, %eh_typeid
   br i1 %tmp2, label %eh_then, label %Unwind
@@ -21,16 +22,11 @@ eh_then:                                          ; preds = %unwind, %entry
   ret void
 
 Unwind:                                           ; preds = %unwind
-  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
-  unreachable
+  resume { i8*, i32 } %eh_ptr
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index f6db0d0..838a0c3 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 265d968..2e95082 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
deleted file mode 100644
index 704b2ba..0000000
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86
-; RUN: llc < %s -march=x86-64
-
-	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
-
-define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
-entry:
-	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
-	unreachable
-}
-
-declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
deleted file mode 100644
index 8f4d353..0000000
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of re-materialization} | grep 2
-; rdar://5761454
-
-	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
-
-define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
-entry:
-	br i1 true, label %bb43.i, label %bb.i
-
-bb.i:		; preds = %entry
-	ret %struct.quad_struct* null
-
-bb43.i:		; preds = %entry
-	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
-
-bb11.i38.i:		; preds = %bb43.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit40.i:		; preds = %bb43.i
-	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
-
-bb11.i28.i:		; preds = %CheckOutside.exit40.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
-	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
-
-bb11.i18.i:		; preds = %CheckOutside.exit30.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
-	br i1 true, label %bb34, label %bb11.i8.i
-
-bb11.i8.i:		; preds = %CheckOutside.exit20.i
-	ret %struct.quad_struct* null
-
-bb34:		; preds = %CheckOutside.exit20.i
-	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
-	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
-	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
-	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
-	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	unreachable
-}
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
deleted file mode 100644
index 33d658c..0000000
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan | grep movss | count 1
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan -stats |& grep {Number of re-materialization} | grep 1
-
-	%struct..0objc_object = type opaque
-	%struct.OhBoy = type {  }
-	%struct.BooHoo = type { i32 }
-	%struct.objc_selector = type opaque
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
-entry:
-	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
-	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
-	br i1 %tmp30, label %bb33, label %bb87.preheader
-bb33:		; preds = %entry
-	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
-	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
-	br i1 %tmp35, label %bb38, label %bb87.preheader
-bb38:		; preds = %bb33
-	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
-	br label %bb43
-bb43:		; preds = %bb38
-	store i32 %tmp53, i32* null, align 4
-	ret void
-bb50:		; preds = %bb38
-	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
-	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
-	br i1 %tmp57, label %bb64, label %bb87.preheader
-bb64:		; preds = %bb50
-	ret void
-bb87.preheader:		; preds = %bb50, %bb33, %entry
-	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
-	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
-	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
-	br label %bb108
-bb108:		; preds = %bb108, %bb87.preheader
-	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
-	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
-	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb130, label %bb108
-bb130:		; preds = %bb108
-	ret void
-}
-
-declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
-
-declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index e5dda4a..ac167b0 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 0d11546..c068f8a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -2,8 +2,6 @@
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
-declare i8* @llvm.eh.exception() nounwind 
-
 declare i8* @_Znwm(i32)
 
 declare i8* @__cxa_begin_catch(i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 90af387..a6234d3 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | not grep movsd
 ; RUN: llc < %s -march=x86 | grep movw
 ; RUN: llc < %s -march=x86 | grep addw
-; These transforms are turned off for volatile loads and stores.
+; These transforms are turned off for load volatiles and stores.
 ; Check that they weren't turned off for all loads and stores!
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 8665282..037559e 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -8,13 +8,13 @@
 
 define i16 @f(i64 %x, double %y) {
 	%b = bitcast i64 %x to double		; <double> [#uses=1]
-	volatile store double %b, double* @atomic ; one processor operation only
-	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	store volatile double %b, double* @atomic ; one processor operation only
+	store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
 	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
-	volatile store i64 %b2, i64* @anything ; may transform to store of double
-	%l = volatile load i32* @ioport		; must not narrow
+	store volatile i64 %b2, i64* @anything ; may transform to store of double
+	%l = load volatile i32* @ioport		; must not narrow
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = volatile load i32* @ioport		; must not narrow
+	%l2 = load volatile i32* @ioport		; must not narrow
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index 101b3c5..f0d46a0 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movap
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 2dc1dea..757f1ff 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; originally from PR2687, but things don't work that way any more.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 511c7b5..6867ae7 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 935c4c5..f35245b 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_407, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
deleted file mode 100644
index 6c70c5b..0000000
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s | not grep shrl
-; Note: this test is really trying to make sure that the shift
-; returns the right result; shrl is most likely wrong,
-; but if CodeGen starts legitimately using an shrl here,
-; please adjust the test appropriately.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
-
-define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
-entry:
-	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
-	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
-	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
-	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
-	ret i64 %x.0
-}
-
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 75e0b8a..435adbb 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 9d24084..3e42553 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -6,6 +6,6 @@ define void @test(<8 x double>* %P, i64* %Q) nounwind {
 	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
 	%C = lshr i512 %B, 448		; <i512> [#uses=1]
 	%D = trunc i512 %C to i64		; <i64> [#uses=1]
-	volatile store i64 %D, i64* %Q
+	store volatile i64 %D, i64* %Q
 	ret void
 }
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
deleted file mode 100644
index a46a20b..0000000
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
-; RUN: grep movss %t | count 2
-; RUN: grep movaps %t | count 2
-; RUN: grep movdqa %t | count 2
-
-define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
-newFuncRoot:
-	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
-	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp89, <4 x float>* %tmp89.out
-	ret i1 false
-}
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
deleted file mode 100644
index 951e191..0000000
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=linearscan -stats |& grep virtregrewriter | not grep {stores unfolded}
-; rdar://6682365
-
-; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
-
-	%struct.CAST_KEY = type { [32 x i32], i32 }
-@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
-@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
-@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
-
-define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
-bb1.thread:
-	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
-	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
-	%2 = load i32* null, align 4		; <i32> [#uses=1]
-	%3 = shl i32 %2, 24		; <i32> [#uses=1]
-	%4 = load i32* null, align 4		; <i32> [#uses=1]
-	%5 = shl i32 %4, 16		; <i32> [#uses=1]
-	%6 = load i32* null, align 4		; <i32> [#uses=1]
-	%7 = or i32 %5, %3		; <i32> [#uses=1]
-	%8 = or i32 %7, %6		; <i32> [#uses=1]
-	%9 = or i32 %8, 0		; <i32> [#uses=1]
-	%10 = load i32* null, align 4		; <i32> [#uses=1]
-	%11 = shl i32 %10, 24		; <i32> [#uses=1]
-	%12 = load i32* %0, align 4		; <i32> [#uses=1]
-	%13 = shl i32 %12, 16		; <i32> [#uses=1]
-	%14 = load i32* null, align 4		; <i32> [#uses=1]
-	%15 = or i32 %13, %11		; <i32> [#uses=1]
-	%16 = or i32 %15, %14		; <i32> [#uses=1]
-	%17 = or i32 %16, 0		; <i32> [#uses=1]
-	br label %bb11
-
-bb11:		; preds = %bb11, %bb1.thread
-	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
-	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
-	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
-	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
-	%24 = load i32* null, align 4		; <i32> [#uses=1]
-	%25 = xor i32 0, %24		; <i32> [#uses=1]
-	%26 = xor i32 %25, 0		; <i32> [#uses=1]
-	%27 = xor i32 %26, 0		; <i32> [#uses=4]
-	%28 = and i32 %27, 255		; <i32> [#uses=2]
-	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
-	%30 = and i32 %29, 255		; <i32> [#uses=2]
-	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
-	%32 = and i32 %31, 255		; <i32> [#uses=1]
-	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4		; <i32> [#uses=2]
-	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
-	%36 = load i32* %35, align 4		; <i32> [#uses=2]
-	%37 = xor i32 %34, 0		; <i32> [#uses=1]
-	%38 = xor i32 %37, %36		; <i32> [#uses=1]
-	%39 = xor i32 %38, 0		; <i32> [#uses=1]
-	%40 = xor i32 %39, 0		; <i32> [#uses=1]
-	%41 = xor i32 %40, 0		; <i32> [#uses=3]
-	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
-	%43 = and i32 %42, 255		; <i32> [#uses=2]
-	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
-	%45 = and i32 %44, 255		; <i32> [#uses=1]
-	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 4		; <i32> [#uses=1]
-	%48 = load i32* null, align 4		; <i32> [#uses=1]
-	%49 = xor i32 %47, 0		; <i32> [#uses=1]
-	%50 = xor i32 %49, %48		; <i32> [#uses=1]
-	%51 = xor i32 %50, 0		; <i32> [#uses=1]
-	%52 = xor i32 %51, 0		; <i32> [#uses=1]
-	%53 = xor i32 %52, 0		; <i32> [#uses=2]
-	%54 = and i32 %53, 255		; <i32> [#uses=1]
-	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
-	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
-	%57 = load i32* %56, align 4		; <i32> [#uses=1]
-	%58 = xor i32 0, %57		; <i32> [#uses=1]
-	%59 = xor i32 %58, 0		; <i32> [#uses=1]
-	%60 = xor i32 %59, 0		; <i32> [#uses=1]
-	store i32 %60, i32* null, align 4
-	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 4		; <i32> [#uses=1]
-	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
-	%64 = load i32* %63, align 4		; <i32> [#uses=1]
-	%65 = xor i32 0, %64		; <i32> [#uses=1]
-	%66 = xor i32 %65, 0		; <i32> [#uses=1]
-	store i32 %66, i32* null, align 4
-	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
-	%68 = load i32* %67, align 4		; <i32> [#uses=1]
-	%69 = xor i32 %36, %34		; <i32> [#uses=1]
-	%70 = xor i32 %69, 0		; <i32> [#uses=1]
-	%71 = xor i32 %70, %68		; <i32> [#uses=1]
-	%72 = xor i32 %71, 0		; <i32> [#uses=1]
-	store i32 %72, i32* null, align 4
-	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
-	%74 = load i32* %73, align 4		; <i32> [#uses=2]
-	%75 = load i32* null, align 4		; <i32> [#uses=1]
-	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
-	%77 = load i32* %76, align 4		; <i32> [#uses=1]
-	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
-	%79 = load i32* %78, align 4		; <i32> [#uses=1]
-	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=2]
-	%82 = xor i32 %75, %74		; <i32> [#uses=1]
-	%83 = xor i32 %82, %77		; <i32> [#uses=1]
-	%84 = xor i32 %83, %79		; <i32> [#uses=1]
-	%85 = xor i32 %84, %81		; <i32> [#uses=1]
-	store i32 %85, i32* null, align 4
-	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
-	%87 = load i32* %86, align 4		; <i32> [#uses=1]
-	%88 = xor i32 %74, %41		; <i32> [#uses=1]
-	%89 = xor i32 %88, %87		; <i32> [#uses=1]
-	%90 = xor i32 %89, 0		; <i32> [#uses=1]
-	%91 = xor i32 %90, %81		; <i32> [#uses=1]
-	%92 = xor i32 %91, 0		; <i32> [#uses=3]
-	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
-	%94 = and i32 %93, 255		; <i32> [#uses=1]
-	store i32 %94, i32* null, align 4
-	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
-	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
-	%99 = load i32* %98, align 4		; <i32> [#uses=1]
-	%100 = load i32* null, align 4		; <i32> [#uses=0]
-	%101 = xor i32 %97, 0		; <i32> [#uses=1]
-	%102 = xor i32 %101, %99		; <i32> [#uses=1]
-	%103 = xor i32 %102, 0		; <i32> [#uses=1]
-	%104 = xor i32 %103, 0		; <i32> [#uses=0]
-	store i32 0, i32* null, align 4
-	%105 = xor i32 0, %27		; <i32> [#uses=1]
-	%106 = xor i32 %105, 0		; <i32> [#uses=1]
-	%107 = xor i32 %106, 0		; <i32> [#uses=1]
-	%108 = xor i32 %107, 0		; <i32> [#uses=1]
-	%109 = xor i32 %108, %62		; <i32> [#uses=3]
-	%110 = and i32 %109, 255		; <i32> [#uses=1]
-	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
-	%112 = and i32 %111, 255		; <i32> [#uses=1]
-	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
-	store i32 %113, i32* %1, align 4
-	%114 = load i32* null, align 4		; <i32> [#uses=1]
-	%115 = xor i32 0, %114		; <i32> [#uses=1]
-	%116 = xor i32 %115, 0		; <i32> [#uses=1]
-	%117 = xor i32 %116, 0		; <i32> [#uses=1]
-	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
-	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
-	store i32 %117, i32* %118, align 4
-	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
-	store i32 0, i32* null, align 4
-	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = xor i32 0, %121		; <i32> [#uses=1]
-	store i32 %122, i32* null, align 4
-	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%124 = load i32* %123, align 4		; <i32> [#uses=1]
-	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
-	%126 = load i32* %125, align 4		; <i32> [#uses=1]
-	%127 = xor i32 0, %124		; <i32> [#uses=1]
-	%128 = xor i32 %127, 0		; <i32> [#uses=1]
-	%129 = xor i32 %128, %126		; <i32> [#uses=1]
-	%130 = xor i32 %129, 0		; <i32> [#uses=1]
-	store i32 %130, i32* null, align 4
-	br label %bb11
-}
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 90dabb8..8bbdb0e 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -9,30 +9,30 @@
 @X = external global i64		; <i64*> [#uses=25]
 
 define fastcc i64 @foo() nounwind {
-	%tmp = volatile load i64* @X		; <i64> [#uses=7]
-	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
-	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp = load volatile i64* @X		; <i64> [#uses=7]
+	%tmp1 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp2 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp3 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp4 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp5 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp6 = load volatile i64* @X		; <i64> [#uses=2]
+	%tmp7 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp8 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp9 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp10 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp11 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp12 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp13 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp14 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp15 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp16 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp17 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp18 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp19 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp20 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp21 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp22 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp23 = load volatile i64* @X		; <i64> [#uses=1]
 	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
 	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
 	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
@@ -229,7 +229,7 @@ define fastcc i64 @foo() nounwind {
 	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
 	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
 	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
-	volatile store i64 %tmp219, i64* @X, align 8
+	store volatile i64 %tmp219, i64* @X, align 8
 	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
 	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
 	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 620e0f3..9f5a8c5 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,11 +1,10 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
 ; CHECK: pextrw $14
 ; CHECK-NEXT: shrl $8
-; CHECK-NEXT: (%ebp)
 ; CHECK-NEXT: pinsrw
 
 define void @update(i8** %args_list) nounwind {
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index a5e28c0..c2cd89c 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@ entry:
 	br label %bb
 
 bb:		; preds = %bb.i, %bb, %entry
-	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g_9, align 4		; <i32> [#uses=2]
 	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
 	%4 = and i1 %3, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb.i, label %bb
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 12bd285..1259cf4 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
 ; CHECK: movaps  %xmm8, (%rsp)
 ; CHECK: movaps  %xmm7, 16(%rsp)
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 5c51480..5f5d5cc 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 2
+; RUN: grep movzbl %t1 | count 1
 ; RUN: grep movd %t1 | count 4
 
 define <4 x i16> @a(i32* %x1) nounwind {
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index 07ef53e..66caedf 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -mattr=+mmx | grep movd | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
deleted file mode 100644
index 3e5bd34..0000000
--- a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
-; PR4552
-
-target triple = "i386-pc-linux-gnu"
-@g_8 = internal global i32 0		; <i32*> [#uses=1]
-@g_72 = internal global i32 0		; <i32*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
-entry:
-	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
-	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %func_40.exit, %entry
-	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
-	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
-	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
-	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb2.i.i, label %bb.i.i
-
-bb.i.i:		; preds = %bb
-	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
-	%4 = and i32 %3, 50295		; <i32> [#uses=1]
-	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb2.i.i, label %func_55.exit.i
-
-bb2.i.i:		; preds = %bb.i.i, %bb
-	br label %func_55.exit.i
-
-func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
-	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
-	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
-	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
-	br i1 %9, label %bb2.i4.i, label %bb.i3.i
-
-bb.i3.i:		; preds = %func_55.exit.i
-	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
-	%11 = and i32 %10, 50295		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb2.i4.i, label %func_40.exit
-
-bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
-	br label %func_40.exit
-
-func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
-	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
-	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
-	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
-	br label %bb
-}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 790fd88..410a42a 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,18 +41,18 @@ bb3:                                              ; preds = %bb2, %bb
   br i1 undef, label %bb5, label %bb4
 
 bb4:                                              ; preds = %bb3
-  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %17 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %18 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
   %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
   br i1 undef, label %return, label %bb6.preheader
 
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
-  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %22 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index f6ac2ba..d4a74c9 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 69787c7..5372bc5 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
 ; There are no MMX operations here, so we use XMM or i64.
 
+; CHECK: ti8
 define void @ti8(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <8 x i8>
         %tmp2 = bitcast double %b to <8 x i8>
         %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddb %xmm1, %xmm0
+; CHECK:  paddw
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
 
+; CHECK: ti16
 define void @ti16(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <4 x i16>
         %tmp2 = bitcast double %b to <4 x i16>
         %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddw %xmm1, %xmm0
+; CHECK:  paddd
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
 
+; CHECK: ti32
 define void @ti32(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <2 x i32>
         %tmp2 = bitcast double %b to <2 x i32>
         %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddd %xmm1, %xmm0
+; CHECK:  paddq
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
@@ -55,6 +58,7 @@ entry:
         ret void
 }
 
+; CHECK: ti16a
 define void @ti16a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -66,6 +70,7 @@ entry:
         ret void
 }
 
+; CHECK: ti32a
 define void @ti32a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -77,6 +82,7 @@ entry:
         ret void
 }
 
+; CHECK: ti64a
 define void @ti64a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index 7af58dc..cbf5502 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -30,14 +30,16 @@ invoke.cont:                                      ; preds = %entry
   br label %finally
 
 terminate.handler:                                ; preds = %match.end
-  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
-  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
   call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 
 try.handler:                                      ; preds = %entry
-  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
-  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %exc1.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exc1 = extractvalue { i8*, i32 } %exc1.ptr, 0
+  %selector = extractvalue { i8*, i32 } %exc1.ptr, 1
   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
   %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
   br i1 %3, label %match, label %catch.next
@@ -55,9 +57,10 @@ invoke.cont2:                                     ; preds = %match
   br label %match.end
 
 match.handler:                                    ; preds = %match
-  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
-  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
-  store i8* %exc3, i8** %_rethrow
+  %exc3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
+  %7 = extractvalue { i8*, i32 } %exc3, 0
+  store i8* %7, i8** %_rethrow
   store i32 2, i32* %cleanup.dst
   br label %match.end
 
@@ -124,10 +127,6 @@ declare void @_Z6throwsv() ssp
 
 declare i32 @__gxx_personality_v0(...)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_ZSt9terminatev()
 
 declare void @_Unwind_Resume_or_Rethrow(i8*)
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 5accfd7..e0c2c6c 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=generic | FileCheck %s
 ; PR6941
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
deleted file mode 100644
index 2ba12df..0000000
--- a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
-; Use DW_FORM_addr for DW_AT_entry_pc.
-; Radar 8094785
-
-; CHECK:	.byte	17                      ## DW_TAG_compile_unit
-; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
-; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	19                      ## DW_AT_language
-; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
-; CHECK-NEXT:	.byte	3                       ## DW_AT_name
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
-; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
-; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
-; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
-; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
-
-%struct.a = type { i32, %struct.a* }
-
-@ret = common global i32 0                        ; <i32*> [#uses=2]
-
-define void @foo(i32 %x) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
-  store i32 %x, i32* @ret, align 4, !dbg !29
-  ret void, !dbg !31
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i8* @bar(%struct.a* %b) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
-  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
-  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
-  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
-  ret i8* %2, !dbg !35
-}
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
-  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
-  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
-  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
-  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
-  store i32 4, i32* %0, align 8, !dbg !38
-  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
-  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
-  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
-  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
-  ret i32 %3, !dbg !41
-}
-
-!llvm.dbg.sp = !{!0, !6, !15}
-!llvm.dbg.lv.foo = !{!21}
-!llvm.dbg.lv.bar = !{!22}
-!llvm.dbg.lv.main = !{!23, !24, !25}
-!llvm.dbg.gv = !{!27}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!17 = metadata !{metadata !5, metadata !5, metadata !18}
-!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
-!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
-!28 = metadata !{i32 33, i32 0, metadata !0, null}
-!29 = metadata !{i32 35, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 36, i32 0, metadata !30, null}
-!32 = metadata !{i32 38, i32 0, metadata !6, null}
-!33 = metadata !{i32 39, i32 0, metadata !34, null}
-!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 40, i32 0, metadata !34, null}
-!36 = metadata !{i32 43, i32 0, metadata !15, null}
-!37 = metadata !{i32 44, i32 0, metadata !26, null}
-!38 = metadata !{i32 45, i32 0, metadata !26, null}
-!39 = metadata !{i32 46, i32 0, metadata !26, null}
-!40 = metadata !{i32 48, i32 0, metadata !26, null}
-!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
index 98a0887..61f527b 100644
--- a/test/CodeGen/X86/2010-08-04-MingWCrash.ll
+++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -10,14 +10,15 @@ bb1:
   ret void
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %ehspec.fails = icmp slt i32 %eh.selector, 0
   br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
 
 cleanup:
-  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
-  unreachable
+  resume { i8*, i32 } %exn.ptr
 
 ehspec.unexpected:
   tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
@@ -26,12 +27,8 @@ ehspec.unexpected:
 
 declare noalias i8* @malloc()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_Resume_or_Rethrow(i8*)
 
 declare void @__cxa_call_unexpected(i8*)
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
index d98ef14..b3cc35d 100644
--- a/test/CodeGen/X86/2010-08-10-DbgConstant.ll
+++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -1,6 +1,6 @@
-; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; RUN: llc  -mtriple=i686-linux -O0 < %s | FileCheck %s
 ; CHECK: DW_TAG_constant
-; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+; CHECK-NEXT: .long .Lstring3 #{{#?}} DW_AT_name
 
 define void @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 7f13411..166dcf2 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; Check debug info for variable z_s
-;CHECK:       .ascii   "z_s"                  ## DW_AT_name
-;CHECK-NEXT:  .byte   0
+;CHECK: .long Lset13
 ;CHECK-NEXT:  ## DW_AT_decl_file
 ;CHECK-NEXT:  ## DW_AT_decl_line
 ;CHECK-NEXT:  ## DW_AT_type
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 72c79d2..4d5f8d7 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,22 +3,28 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT: construct_3
-; CHECK-DEFAULT: construct_2
-; CHECK-DEFAULT: construct_1
+; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_1
+; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_2
+; CHECK-DEFAULT: .section        .ctors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_3
 
-; CHECK-DARWIN: construct_1
-; CHECK-DARWIN: construct_2
-; CHECK-DARWIN: construct_3
+; CHECK-DARWIN: .long _construct_1
+; CHECK-DARWIN-NEXT: .long _construct_2
+; CHECK-DARWIN-NEXT: .long _construct_3
 
 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
-; CHECK-DEFAULT: destruct_3
-; CHECK-DEFAULT: destruct_2
-; CHECK-DEFAULT: destruct_1
+; CHECK-DEFAULT: .section        .dtors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_1
+; CHECK-DEFAULT: .section        .dtors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_2
+; CHECK-DEFAULT: .section        .dtors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_3
 
-; CHECK-DARWIN: destruct_1
-; CHECK-DARWIN: destruct_2
-; CHECK-DARWIN: destruct_3
+; CHECK-DARWIN:      .long _destruct_1
+; CHECK-DARWIN-NEXT: .long _destruct_2
+; CHECK-DARWIN-NEXT: .long _destruct_3
 
 declare void @construct_1()
 declare void @construct_2()
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
new file mode 100644
index 0000000..8c09d97
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=x86 -fast-isel -mattr=+sse < %s | FileCheck %s
+; <rdar://problem/10215997>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+define void @vectortest() nounwind ssp {
+entry:
+  %p1 = alloca <4 x float>, align 16
+  %p2 = alloca <4 x float>, align 16
+  %p3 = alloca <4 x float>, align 16
+  %p4 = alloca <4 x float>, align 16
+  %p5 = alloca <4 x float>, align 16
+  store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>, <4 x float>* %p1, align 16
+  store <4 x float> <float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000>, <4 x float>* %p2, align 16
+  store <4 x float> <float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000>, <4 x float>* %p3, align 16
+  store <4 x float> <float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000>, <4 x float>* %p4, align 16
+  store <4 x float> <float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000>, <4 x float>* %p5, align 16
+  %0 = load <4 x float>* %p1, align 16
+  %1 = load <4 x float>* %p2, align 16
+  %2 = load <4 x float>* %p3, align 16
+  %3 = load <4 x float>* %p4, align 16
+  %4 = load <4 x float>* %p5, align 16
+; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
+; CHECK-NEXT: calll _dovectortest 
+  call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
+  ret void
+}
+
+declare void @dovectortest(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
new file mode 100644
index 0000000..a720753
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { <2 x i8> }
+
+@i = global <2 x i8> <i8 150, i8 100>, align 8
+@j = global <2 x i8> <i8 10, i8 13>, align 8
+@res = common global %union.anon zeroinitializer, align 8
+
+; Make sure we load the constants i and j starting offset zero.
+; Also make sure that we sign-extend it.
+; Based on /gcc-4_2-testsuite/src/gcc.c-torture/execute/pr23135.c
+
+; CHECK: main
+define i32 @main() nounwind uwtable {
+entry:
+; CHECK: movsbq  j(%rip), %
+; CHECK: movsbq  i(%rip), %
+  %0 = load <2 x i8>* @i, align 8
+  %1 = load <2 x i8>* @j, align 8
+  %div = sdiv <2 x i8> %1, %0
+  store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon* @res, i32 0, i32 0), align 8
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
new file mode 100644
index 0000000..e08c5b2
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when legalizng vselect and vsetcc and that
+; we are able to generate vector blend instructions.
+
+; CHECK: simple_widen
+; CHECK: blend
+; CHECK: ret
+define void @simple_widen() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: complex_inreg_work
+; CHECK: blend
+; CHECK: ret
+
+define void @complex_inreg_work() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: zero_test
+; CHECK: blend
+; CHECK: ret
+
+define void @zero_test() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: full_test
+; CHECK: blend
+; CHECK: ret
+
+define void @full_test() {
+ entry:
+   %Cy300 = alloca <4 x float>
+   %Cy11a = alloca <2 x float>
+   %Cy118 = alloca <2 x float>
+   %Cy119 = alloca <2 x float>
+   br label %B1
+
+ B1:                                               ; preds = %entry
+   %0 = load <2 x float>* %Cy119
+   %1 = fptosi <2 x float> %0 to <2 x i32>
+   %2 = sitofp <2 x i32> %1 to <2 x float>
+   %3 = fcmp ogt <2 x float> %0, zeroinitializer
+   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
+   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
+   %6 = fcmp oeq <2 x float> %2, %0
+   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
+   store <2 x float> %7, <2 x float>* %Cy118
+   %8 = load <2 x float>* %Cy118
+   store <2 x float> %8, <2 x float>* %Cy11a
+   ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
new file mode 100644
index 0000000..2fe645b
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that a <4 x float> compare is generated and that we are
+; not stuck in an endless loop.
+
+; CHECK: cmp_2_floats
+; CHECK: cmpordps
+; CHECK: ret
+
+define void @cmp_2_floats() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: cmp_2_doubles
+; CHECK: cmpordpd
+; CHECK: blendvpd
+; CHECK: ret
+define void @cmp_2_doubles() {
+entry:
+  %0 = fcmp oeq <2 x double> undef, undef
+  %1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
+  store <2 x double> %1, <2 x double>* undef
+  ret void
+}
+
+; CHECK: mp_11193
+; CHECK: psraw   $15
+; CHECK: ret
+define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET)
+nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %t = extractelement <8 x i1> %bincmp, i32 0
+  %ft = sitofp i1 %t to float
+  %pp = bitcast <8 x float>* %RET to float*
+  store float %ft, float* %pp
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
new file mode 100644
index 0000000..6e83f67
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: ltstore
+;CHECK: movq
+;CHECK: movq
+;CHECK: ret
+define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
+entry:
+  %in = load <4 x i32>* %pA
+  %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x i32> %j, <2 x i32>* %pB
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-30-padd.ll b/test/CodeGen/X86/2011-10-30-padd.ll
new file mode 100644
index 0000000..180ca15
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-30-padd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: addXX_test
+;CHECK: padd
+;CHECK: ret
+
+
+define <16 x i8> @addXX_test(<16 x i8> %a) {
+      %b = add <16 x i8> %a, %a
+      ret <16 x i8> %b
+}
+
+;CHECK: instcombine_test
+;CHECK: padd
+;CHECK: ret
+define <16 x i8> @instcombine_test(<16 x i8> %a) {
+  %b = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %b
+}
+
diff --git a/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
new file mode 100644
index 0000000..d316470
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; We don't really care what this outputs; just make sure it's somewhat sane.
+; CHECK: legalize_test
+; CHECK: vmovups
+define void @legalize_test(i32 %x, <8 x i32>* %p) nounwind {
+entry:
+  %t1 = insertelement <8 x i32> <i32 undef, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>, i32 %x, i32 0
+  %t2 = shufflevector <8 x i32> %t1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %int2float = sitofp <8 x i32> %t2 to <8 x float>
+  %blendAsInt.i821 = bitcast <8 x float> %int2float to <8 x i32>
+  store <8 x i32> %blendAsInt.i821, <8 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
new file mode 100644
index 0000000..8174109
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11"
+
+; This test would create a vpand %ymm instruction that is only legal in AVX2.
+; CHECK-NOT: vpand %ymm
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+define void @ShadeTile() nounwind {
+allocas:
+  br i1 undef, label %if_then, label %if_else
+
+if_then:                                          ; preds = %allocas
+  unreachable
+
+if_else:                                          ; preds = %allocas
+  br i1 undef, label %for_loop156.lr.ph, label %if_exit
+
+for_loop156.lr.ph:                                ; preds = %if_else
+  %val_6.i21244 = load i16* undef, align 2
+  %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
+  %val_7.i21248 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
+  %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
+  %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
+  %bitop8.i20209 = and <8 x i32> %uint2uint32.i20206, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  %bitop12.i20211 = lshr <8 x i32> %bitop5.i20208, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %binop13.i20212 = add <8 x i32> %bitop12.i20211, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+  %bitop15.i20213 = shl <8 x i32> %binop13.i20212, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  %bitop17.i20214 = shl <8 x i32> %bitop8.i20209, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+  %bitop20.i20215 = or <8 x i32> undef, %bitop15.i20213
+  %bitop22.i20216 = or <8 x i32> %bitop20.i20215, %bitop17.i20214
+  %int_to_float_bitcast.i.i.i20217 = bitcast <8 x i32> %bitop22.i20216 to <8 x float>
+  %binop401 = fmul <8 x float> undef, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop402 = fadd <8 x float> %binop401, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop403 = fmul <8 x float> zeroinitializer, %binop402
+  %binop406 = fmul <8 x float> %int_to_float_bitcast.i.i.i20217, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop408 = fmul <8 x float> zeroinitializer, %binop407
+  %binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
+  %val_4.i21290 = load i16* undef, align 2
+  %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
+  %val_5.i21294 = load i16* undef, align 2
+  %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
+  %val_6.i21298 = load i16* undef, align 2
+  %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
+  %ptr_7.i21301 = inttoptr i64 undef to i16*
+  %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+  %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
+  %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
+  %structelement561 = load i8** undef, align 8
+  %ptr2int563 = ptrtoint i8* %structelement561 to i64
+  %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
+  %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
+  %val_5.i21509 = load i8* null, align 1
+  %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
+  %7 = insertelement <8 x i8> %6, i8 undef, i32 6
+  %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
+  %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
+  %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+  %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
+  %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
+  %binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  br i1 undef, label %for_loop594.lr.ph, label %for_exit595
+
+if_exit:                                          ; preds = %if_else
+  ret void
+
+for_loop594.lr.ph:                                ; preds = %for_loop156.lr.ph
+  %bitop8.i20221 = and <8 x i32> %uint2uint32.i20218, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  br i1 undef, label %cif_test_all730, label %cif_mask_mixed1552
+
+for_exit595:                                      ; preds = %for_loop156.lr.ph
+  unreachable
+
+cif_test_all730:                                  ; preds = %for_loop594.lr.ph
+  %binop11.i20545 = fmul <8 x float> %binop408, zeroinitializer
+  %binop12.i20546 = fadd <8 x float> undef, %binop11.i20545
+  %binop15.i20547 = fmul <8 x float> %binop411, undef
+  %binop16.i20548 = fadd <8 x float> %binop12.i20546, %binop15.i20547
+  %bincmp774 = fcmp ogt <8 x float> %binop16.i20548, zeroinitializer
+  %val_to_boolvec32775 = sext <8 x i1> %bincmp774 to <8 x i32>
+  %floatmask.i20549 = bitcast <8 x i32> %val_to_boolvec32775 to <8 x float>
+  %v.i20550 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i20549) nounwind readnone
+  %cond = icmp eq i32 %v.i20550, 255
+  br i1 %cond, label %cif_test_all794, label %cif_test_mixed
+
+cif_test_all794:                                  ; preds = %cif_test_all730
+  %binop.i20572 = fmul <8 x float> %binop403, undef
+  unreachable
+
+cif_test_mixed:                                   ; preds = %cif_test_all730
+  %binop1207 = fmul <8 x float> %binop.i20246, undef
+  unreachable
+
+cif_mask_mixed1552:                               ; preds = %for_loop594.lr.ph
+  unreachable
+}
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
new file mode 100644
index 0000000..0a949eb
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "x86_64-apple-macosx10.6.6"
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb        %xmm1, %xmm2
+; CHECK: ret
+
+define void @select_func() {
+entry:
+  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
+  %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
+  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
+  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
+  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
+  store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
new file mode 100644
index 0000000..fcaabdd
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11494
+
+define void @test(<4 x i32>* nocapture %p) nounwind {
+  ; CHECK: test:
+  ; CHECK: vpxor %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
+  ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
+  ; CHECK-NEXT: ret
+  %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  store <4 x i32> %c, <4 x i32>* %p, align 1
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
new file mode 100644
index 0000000..7a4126f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR11495
+
+; CHECK: 1311768467463790320
+@v = global <2 x float> bitcast (<1 x i64> <i64 1311768467463790320> to <2 x float>), align 8
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
new file mode 100644
index 0000000..1561784
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mattr=+avx
+; Various missing patterns causing crashes.
+; rdar://10538793
+
+define void @t1() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t1.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t1.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  br label %t1.exit
+
+t1.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define void @t2() nounwind {
+  br i1 undef, label %1, label %4
+
+; <label>:1                                       ; preds = %0
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %4
+
+; <label>:4                                       ; preds = %1, %0
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t2.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t2.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %t2.exit
+
+t2.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define <3 x i64> @t4() nounwind {
+entry:
+  %0 = load <2 x i64> addrspace(1)* undef, align 16
+  %1 = extractelement <2 x i64> %0, i32 0
+  %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
+  ret <3 x i64> %2
+}
+
+define void @t5() nounwind {
+entry:
+  %0 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <8 x i64> <i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 0, i64 0, i64 0>, <8 x i64> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 8, i32 5, i32 6, i32 7>
+  store <8 x i64> %1, <8 x i64> addrspace(1)* undef, align 64
+
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
new file mode 100644
index 0000000..6f9188c
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; Test case for r146671
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
+  ; Make sure operands to pblend are in the right order.
+  ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+  ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+  ; CHECK-W-SSE4: psllw $2
+
+  ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+  ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+  ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+  ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
+  %1 = shl <16 x i8> %a, %b
+  ret <16 x i8> %1
+}
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
new file mode 100644
index 0000000..39c213f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; Make sure we don't load from the location pointed to by %p
+; twice: it has non-obvious performance implications, and
+; the relevant transformation doesn't know how to update
+; the chains correctly.
+; PR10747
+
+; CHECK: test:
+; CHECK: pextrd $2, %xmm
+define <4 x i32> @test(<4 x i32>* %p) {
+  %v = load <4 x i32>* %p
+  %e = extractelement <4 x i32> %v, i32 2
+  %cmp = icmp eq i32 %e, 3
+  %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
new file mode 100644
index 0000000..dbc122a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  -mcpu=corei7 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.2.0"
+
+; CHECK: @foo8
+; CHECK: psll
+; CHECK: psraw
+; CHECK: pblendvb
+; CHECK: ret
+define void @foo8(float* nocapture %RET) nounwind {
+allocas:
+  %resultvec.i = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <8 x i8> <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
+  %uint2float = uitofp <8 x i8> %resultvec.i to <8 x float>
+  %ptr = bitcast float * %RET to <8 x float> *
+  store <8 x float> %uint2float, <8 x float>* %ptr, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
new file mode 100644
index 0000000..e2b3ebc
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
+; CHECK: prom_bug
+; CHECK: shufb
+; CHECK: movd
+; CHECK: movw
+; CHECK: ret
+define void @prom_bug(<4 x i8> %t, i16* %p) {
+  %r = bitcast <4 x i8> %t to <2 x i16>
+  %o = extractelement <2 x i16> %r, i32 0
+  store i16 %o, i16* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
new file mode 100644
index 0000000..75efcf5
--- /dev/null
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that the booleans are converted using zext and not via sext.
+; 0x1 means that we only look at the first bit.
+
+;CHECK: 0x1
+;CHECK: ui_to_fp_conv
+;CHECK: ret
+define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %bool2float = uitofp <8 x i1> %bincmp to <8 x float>
+  store <8 x float> %bool2float, <8 x float>* %RET, align 4
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
new file mode 100644
index 0000000..832a8eb
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+; This test case has a landing pad with two predecessors, and a variable that
+; is undef on the first edge while carrying the first function return value on
+; the second edge.
+;
+; Live range splitting tries to isolate the block containing the first function
+; call, and it is important that the last split point is after the function call
+; so the return value can spill.
+;
+; <rdar://problem/10664933>
+
+@Exception = external unnamed_addr constant { i8*, i8* }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp {
+bb:
+  br i1 undef, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  %tmp = select i1 false, i32 0, i32 undef
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  %tmp8 = phi i32 [ %tmp, %bb6 ], [ 0, %bb ]
+  %tmp9 = shl i32 %tmp8, 2
+  %tmp10 = invoke noalias i8* @_Znam(i32 undef) optsize
+          to label %bb11 unwind label %bb20
+
+bb11:                                             ; preds = %bb7
+  %tmp12 = ptrtoint i8* %tmp10 to i32
+  %tmp13 = bitcast i8* %tmp10 to i32*
+  %tmp14 = shl i32 %tmp8, 2
+  %tmp15 = getelementptr i32* %tmp13, i32 undef
+  %tmp16 = getelementptr i32* %tmp13, i32 undef
+  %tmp17 = zext i32 %tmp9 to i64
+  %tmp18 = add i64 %tmp17, -1
+  %tmp19 = icmp ugt i64 %tmp18, 4294967295
+  br i1 %tmp19, label %bb29, label %bb31
+
+bb20:                                             ; preds = %bb43, %bb41, %bb29, %bb7
+  %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ]
+  %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @Exception to i8*)
+  br i1 undef, label %bb23, label %bb69
+
+bb23:                                             ; preds = %bb38, %bb20
+  %tmp24 = phi i32 [ %tmp12, %bb38 ], [ %tmp21, %bb20 ]
+  %tmp25 = icmp eq i32 %tmp24, 0
+  br i1 %tmp25, label %bb28, label %bb26
+
+bb26:                                             ; preds = %bb23
+  %tmp27 = inttoptr i32 %tmp24 to i8*
+  br label %bb28
+
+bb28:                                             ; preds = %bb26, %bb23
+  ret void
+
+bb29:                                             ; preds = %bb11
+  invoke void @OnOverFlow() optsize
+          to label %bb30 unwind label %bb20
+
+bb30:                                             ; preds = %bb29
+  unreachable
+
+bb31:                                             ; preds = %bb11
+  %tmp32 = bitcast i32* %tmp15 to i8*
+  %tmp33 = zext i32 %tmp8 to i64
+  %tmp34 = add i64 %tmp33, -1
+  %tmp35 = icmp ugt i64 %tmp34, 4294967295
+  %tmp36 = icmp sgt i32 %tmp8, 0
+  %tmp37 = add i32 %tmp9, -4
+  br label %bb38
+
+bb38:                                             ; preds = %bb67, %bb31
+  %tmp39 = phi i32 [ %tmp68, %bb67 ], [ undef, %bb31 ]
+  %tmp40 = icmp sgt i32 %tmp39, undef
+  br i1 %tmp40, label %bb41, label %bb23
+
+bb41:                                             ; preds = %bb38
+  invoke void @Pjii(i32* %tmp16, i32 0, i32 %tmp8) optsize
+          to label %bb42 unwind label %bb20
+
+bb42:                                             ; preds = %bb41
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp35, label %bb43, label %bb45
+
+bb43:                                             ; preds = %bb42
+  invoke void @OnOverFlow() optsize
+          to label %bb44 unwind label %bb20
+
+bb44:                                             ; preds = %bb43
+  unreachable
+
+bb45:                                             ; preds = %bb57, %bb42
+  %tmp46 = phi i32 [ %tmp58, %bb57 ], [ 255, %bb42 ]
+  %tmp47 = icmp slt i32 undef, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb45
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb49, label %bb57
+
+bb49:                                             ; preds = %bb49, %bb48
+  %tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = add i32 %tmp50, undef
+  %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
+  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp55 = add i32 %tmp50, 1
+  %tmp56 = icmp eq i32 %tmp55, %tmp8
+  br i1 %tmp56, label %bb57, label %bb49
+
+bb57:                                             ; preds = %bb49, %bb48
+  %tmp58 = add i32 %tmp46, -1
+  br label %bb45
+
+bb59:                                             ; preds = %bb45
+  %tmp60 = ashr i32 %tmp46, 31
+  tail call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 %tmp37, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb61, label %bb67
+
+bb61:                                             ; preds = %bb61, %bb59
+  %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
+  %tmp63 = add i32 %tmp62, %tmp14
+  %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  %tmp65 = add i32 %tmp62, 1
+  %tmp66 = icmp eq i32 %tmp65, %tmp8
+  br i1 %tmp66, label %bb67, label %bb61
+
+bb67:                                             ; preds = %bb61, %bb59
+  %tmp68 = add i32 %tmp39, -1
+  br label %bb38
+
+bb69:                                             ; preds = %bb20
+  resume { i8*, i32 } %tmp22
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare noalias i8* @_Znam(i32) optsize
+
+declare void @Pjii(i32*, i32, i32) optsize
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare void @OnOverFlow() noreturn optsize ssp align 2
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 0000000..6b90072
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 0000000..fa8e80f
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
new file mode 100644
index 0000000..a883d79
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; PR11768
+
+@ptr = external global i8*
+
+define void @baz() nounwind ssp {
+entry:
+  %0 = load i8** @ptr, align 4
+  %cmp = icmp eq i8* %0, null
+  fence seq_cst
+  br i1 %cmp, label %if.then, label %if.else
+
+; Make sure the fence comes before the comparison, since it
+; clobbers EFLAGS.
+
+; CHECK: lock
+; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK-NEXT: cmpl $0
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @foo to void ()*)() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @bar to void ()*)() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+declare void @bar(...)
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
new file mode 100644
index 0000000..8a3ccc8
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
+
+;CHECK: vcast
+define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
+;CHECK: pshufd
+;CHECK: pshufd
+  %af = bitcast <2 x float> %a to <2 x i32>
+  %bf = bitcast <2 x float> %b to <2 x i32>
+  %x = sub <2 x i32> %af, %bf
+;CHECK: psubq
+  ret <2 x i32> %x
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
new file mode 100644
index 0000000..13723a2
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @dagco_crash() {
+entry:
+  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
+  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
+  %2 = insertelement <8 x i64> undef, i64 %0, i32 5
+  %3 = insertelement <8 x i64> %2, i64 %1, i32 6
+  %4 = insertelement <8 x i64> %3, i64 undef, i32 7
+  store <8 x i64> %4, <8 x i64> addrspace(1)* undef, align 64
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-02-14-scalar.ll b/test/CodeGen/X86/2012-02-14-scalar.ll
new file mode 100644
index 0000000..1dc076b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-14-scalar.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @autogen_28112_5000() {
+BB:
+  %S17 = icmp sgt <1 x i64> undef, undef
+  %E19 = extractelement <1 x i1> %S17, i32 0
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %S23 = select i1 %E19, i8 undef, i8 undef
+  br label %CF
+}
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
new file mode 100644
index 0000000..557d49d
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; PR11940: Do not optimize away movb %al, %ch
+
+%struct.APInt = type { i64* }
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
+entry:
+; CHECK: bug:
+  %call = tail call i8* @calloc(i32 1, i32 32)
+  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %0 = bitcast i8* %call.i to i64*
+  %rem.i = and i32 %rotateAmt, 63
+  %div.i = lshr i32 %rotateAmt, 6
+  %cmp.i = icmp eq i32 %rem.i, 0
+  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sub.i = sub i32 4, %div.i
+  %cmp23.i = icmp eq i32 %div.i, 4
+  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
+  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %.pre5.i = load i64** %pVal.i, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
+  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
+  %add.i = add i32 %i.04.i, %div.i
+  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
+  %1 = load i64* %arrayidx.i, align 4
+  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
+  store i64 %1, i64* %arrayidx3.i, align 4
+  %inc.i = add i32 %i.04.i, 1
+  %cmp2.i = icmp ult i32 %inc.i, %sub.i
+  br i1 %cmp2.i, label %for.body.i, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i, %entry
+  %cmp81.i = icmp eq i32 %div.i, 3
+  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
+
+for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
+  %sub58.i = sub i32 3, %div.i
+  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %sh_prom.i = zext i32 %rem.i to i64
+  %sub17.i = sub i32 64, %rem.i
+  %sh_prom18.i = zext i32 %sub17.i to i64
+  %.pre.i = load i64** %pVal11.i, align 4
+  br label %for.body9.i
+
+for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
+; CHECK: %for.body9.i
+; CHECK: movb
+; CHECK: shrdl
+  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
+  %add10.i = add i32 %i6.02.i, %div.i
+  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
+  %2 = load i64* %arrayidx12.i, align 4
+  %shr.i = lshr i64 %2, %sh_prom.i
+  %add14.i = add i32 %add10.i, 1
+  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
+  %3 = load i64* %arrayidx16.i, align 4
+  %shl.i = shl i64 %3, %sh_prom18.i
+  %or.i = or i64 %shl.i, %shr.i
+  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
+  store i64 %or.i, i64* %arrayidx19.i, align 4
+  %inc21.i = add i32 %i6.02.i, 1
+  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
+  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
+
+_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
+  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
+  store i64* %0, i64** %4, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
new file mode 100644
index 0000000..a55c77b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; <rdar://problem/10106006>
+
+define void @func() nounwind ssp {
+; CHECK:  psrlw %mm0, %mm1
+entry:
+  call void asm sideeffect "psrlw $0, %mm1", "y,~{dirflag},~{fpsr},~{flags}"(i32 8) nounwind
+  unreachable
+
+bb367:                                            ; preds = %entry                                                                                                                 
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
new file mode 100644
index 0000000..bdce853
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 <%s
+; PR12138
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.0"
+
+%struct.S0 = type { i8, i32 }
+
+@d = external global [2 x [2 x %struct.S0]], align 4
+@c = external global i32, align 4
+@e = external global i32, align 4
+@b = external global i32, align 4
+@a = external global i32, align 4
+
+define void @fn2() nounwind optsize ssp {
+entry:
+  store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
+  %0 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool4 = icmp eq i32 %1, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %f.1.0 = phi i32 [ undef, %entry ], [ %sub, %if.end ]
+  %g.0 = phi i64 [ 0, %entry ], [ %ins, %if.end ]
+  %tobool = icmp eq i32 %f.1.0, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = lshr i64 %g.0, 32
+  %conv = trunc i64 %2 to i16
+  br i1 %tobool2, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %for.body
+  store i32 1, i32* @e, align 4
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %for.body
+  %xor.i = xor i16 %conv, 1
+  %p1.lobit.i8 = lshr i64 %g.0, 47
+  %p1.lobit.i8.tr = trunc i64 %p1.lobit.i8 to i16
+  %p1.lobit.i = and i16 %p1.lobit.i8.tr, 1
+  %and.i = and i16 %p1.lobit.i, %xor.i
+  %3 = xor i16 %and.i, 1
+  %sub.conv.i = sub i16 %conv, %3
+  %conv3 = sext i16 %sub.conv.i to i32
+  store i32 %conv3, i32* @b, align 4
+  br i1 %tobool4, label %if.end, label %for.end
+
+if.end:                                           ; preds = %lor.end
+  %mask = and i64 %g.0, -256
+  %ins = or i64 %mask, 1
+  %sub = add nsw i32 %f.1.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %lor.end, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
new file mode 100644
index 0000000..fec17e9
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
@@ -0,0 +1,10 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; CHECK: build_vector_again
+define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
+entry:
+  %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: shufb
+  ret <4 x i8> %out
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
new file mode 100644
index 0000000..d24647e
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/11070338>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK:      _.memset_pattern:
+; CHECK-NEXT: .quad   4575657222473777152
+; CHECK-NEXT: .quad   4575657222473777152
+
+@.memset_pattern = internal unnamed_addr constant i128 or (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 shl (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 64)), align 16
+
+define void @foo(i8* %a, i64 %b) {
+  call void @memset_pattern16(i8* %a, i8* bitcast (i128* @.memset_pattern to i8*), i64 %b)
+  ret void
+}
+
+declare void @memset_pattern16(i8*, i8*, i64)
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 0000000..101ecca
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
+; RUN:   not grep {Number of machine instructions hoisted out of loops post regalloc}
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+  %call = tail call i32 @foo()
+  %tmp = ashr i32 %call, 31
+  %0 = and i32 %tmp, 1396
+  %index9 = add i32 %0, 2397
+  indirectbr i8* undef, [label %return, label %if.end]
+
+if.end:                                           ; preds = %entry
+  %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+  %tmp6 = load i16* %size5, align 2
+  %tobool1 = icmp eq i16 %tmp6, 0
+  %1 = select i1 %tobool1, i32 1396, i32 -1910
+  %index10 = add i32 %index9, %1
+  indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph:                                 ; preds = %if.end
+  %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+  %tmp9 = load %struct.ref_s** %refs, align 8
+  %tmp4 = zext i16 %tmp6 to i64
+  %index13 = add i32 %index10, 1658
+  %2 = sext i32 %index13 to i64
+  %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+  %blockaddress14 = load i64* %3, align 8
+  %4 = inttoptr i64 %blockaddress14 to i8*
+  indirectbr i8* %4, [label %while.body]
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+  %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+  %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+  store i16 32, i16* %type_attrs, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+  %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+  %index15 = add i32 %index7, %tmp7
+  %tmp8 = select i1 %exitcond5, i64 13, i64 0
+  %5 = sext i32 %index15 to i64
+  %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+  %blockaddress16 = load i64* %6, align 8
+  %7 = inttoptr i64 %blockaddress16 to i8*
+  indirectbr i8* %7, [label %return, label %while.body]
+
+return:                                           ; preds = %while.body, %if.end, %entry
+  %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
new file mode 100644
index 0000000..2d90165
--- /dev/null
+++ b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O1 -verify-coalescing < %s
+; PR12495
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @func(i8* nocapture) nounwind uwtable ssp align 2 {
+  br i1 undef, label %4, label %2
+
+; <label>:2                                       ; preds = %1                  
+  %3 = tail call double @foo() nounwind
+  br label %4
+
+; <label>:4                                       ; preds = %2, %1              
+  %5 = phi double [ %3, %2 ], [ 0.000000e+00, %1 ]
+  %6 = fsub double %5, undef
+  %7 = fcmp olt double %6, 0.000000e+00
+  %8 = select i1 %7, double 0.000000e+00, double %6
+  %9 = fcmp olt double undef, 0.000000e+00
+  %10 = fcmp olt double %8, undef
+  %11 = or i1 %9, %10
+  br i1 %11, label %12, label %14
+
+; <label>:12                                      ; preds = %4                  
+  %13 = tail call double @fmod(double %8, double 0.000000e+00) nounwind
+  unreachable
+
+; <label>:14                                      ; preds = %4                  
+  ret void
+}
+
+declare double @foo()
+
+declare double @fmod(double, double)
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
new file mode 100644
index 0000000..ff6be36
--- /dev/null
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+;CHECK: bad_cast
+define void @bad_cast() {
+entry:
+  %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
+  store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
+;CHECK: ret
+  ret void
+}
+
+
+;CHECK: bad_insert
+define void @bad_insert(i32 %t) {
+entry:
+;CHECK: vpinsrd
+  %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
+  store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/GC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
new file mode 100644
index 0000000..a8ad0f1
--- /dev/null
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
new file mode 100644
index 0000000..100817a
--- /dev/null
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+  %A = load <4 x i8>* %pA
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret void
+}
+
+; CHECK: multi_use_swizzle
+; CHECK: mov
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: xor
+; CHECK-NEXT: ret
+define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
+  %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
+  %R = xor <4 x i32> %S1, %S2
+  ret <4 x i32> %R
+}
+
+; CHECK: pull_bitcast2
+; CHECK: xorl
+; CHECK: ret
+define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
+  %A = load <4 x i8>* %pA
+  store <4 x i8> %A, <4 x i8>* %pC
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret <4 x i8> %C
+}
+
+
+
+; CHECK: reverse_1
+; CHECK-NOT: shuf
+; CHECK: ret
+define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
+
+
+; CHECK: no_reverse_shuff
+; CHECK: shuf
+; CHECK: ret
+define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5068d29..658ccaa 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 7bf527a..8e871f4 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
 
 ; Some of these tests depend on -join-physregs to commute instructions.
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6..aaedf18 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
new file mode 100644
index 0000000..5942788
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
+
+declare void @use_arr(i8*)
+declare void @many_params(i32, i32, i32, i32, i32, i32)
+
+define void @test1() nounwind {
+; atom: test1:
+; atom: leal -1052(%esp), %esp
+; atom-NOT: sub
+; atom: call
+; atom: leal 1052(%esp), %esp
+
+; CHECK: test1:
+; CHECK: subl
+; CHECK: call
+; CHECK-NOT: lea
+  %arr = alloca [1024 x i8], align 16
+  %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+  call void @use_arr(i8* %arr_ptr)
+  ret void
+}
+
+define void @test2() nounwind {
+; atom: test2:
+; atom: leal -28(%esp), %esp
+; atom: call
+; atom: leal 28(%esp), %esp
+
+; CHECK: test2:
+; CHECK-NOT: lea
+  call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret void
+}
+
+define void @test3() nounwind {
+; atom: test3:
+; atom: leal -8(%esp), %esp
+; atom: leal 8(%esp), %esp
+
+; CHECK: test3:
+; CHECK-NOT: lea
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
new file mode 100644
index 0000000..2301dfc
--- /dev/null
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -0,0 +1,28 @@
+; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define void @func() nounwind uwtable {
+; atom: imull
+; atom-NOT: movl
+; atom: imull
+; CHECK: imull
+; CHECK: movl
+; CHECK: imull
+entry:
+  %0 = load i32* @b, align 4
+  %1 = load i32* @c, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* @a, align 4
+  %2 = load i32* @e, align 4
+  %3 = load i32* @f, align 4
+  %mul1 = mul nsw i32 %2, %3
+  store i32 %mul1, i32* @d, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 59988ca..4aa3370 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -259,3 +259,14 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   ret <4 x i64> %x
 }
 
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @int_sqrt_ss() {
+; CHECK: int_sqrt_ss
+; CHECK: vsqrtss
+ %x0 = load float addrspace(1)* undef, align 8
+ %x1 = insertelement <4 x float> undef, float %x0, i32 0
+ %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
+ ret <4 x float> %x2
+}
+
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 0a46b08..8ad0fa8 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -6,7 +6,7 @@
 
 define void @zero128() nounwind ssp {
 entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
   ; CHECK: vmovaps
   store <4 x float> zeroinitializer, <4 x float>* @z, align 16
   ret void
@@ -105,3 +105,19 @@ allocas:
   ret <8 x i32> %updatedret.i30.i
 }
 
+;;;; Don't crash on fneg
+; rdar://10566486
+; CHECK: fneg
+; CHECK: vxorps
+define <16 x float> @fneg(<16 x float> addrspace(1)* nocapture %out) nounwind {
+  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ret <16 x float> %1
+}
+
+;;; Don't crash on build vector
+; CHECK: @build_vec_16x16
+; CHECK: vmovd
+define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
+  %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
+  ret <16 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index d6d2415..32d450c 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -16,7 +16,7 @@ entry:
   ret <4 x double> %shuffle.i
 }
 
-; CHECK: vpxor
+; CHECK: vxorps
 ; CHECK-NEXT: vinsertf128 $0
 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6c0bd58..d0a7fe0 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,7 +18,7 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
-; CHECK: vcvtpd2dqy %ymm
+; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
new file mode 100755
index 0000000..a3aadde
--- /dev/null
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
+
+; CHECK: test1:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+; CHECK: test2:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+
+define <4 x i8> @test1(<4 x double> %d) {
+  %c = fptoui <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
+define <4 x i8> @test2(<4 x double> %d) {
+  %c = fptosi <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 5201688..b334932 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -245,34 +245,6 @@ define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
-
-
-define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
-
-
-define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-  ; CHECK: pushl
-  ; CHECK: movl
-  ; CHECK: vmaskmovdqu
-  ; CHECK: popl
-  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
 
 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vmaxpd
@@ -314,25 +286,10 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 
-define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
-
-
-define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
   ; CHECK: vmulsd
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -412,54 +369,6 @@ define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -749,6 +658,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
   ; CHECK: vmovq
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +668,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
 
 
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +678,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
 
 
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
   ; CHECK: vsubsd
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -955,21 +869,13 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovntdqa
-  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
-
 
-define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vmpsadbw
-  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -996,14 +902,6 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   ; CHECK: vphminposuw
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1180,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sbbl
-  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: seta
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sete
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
@@ -1317,14 +1215,6 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
   ; CHECK: movl
@@ -1512,14 +1402,6 @@ define void @test_x86_sse_ldmxcsr(i8* %a0) {
 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
 
 
-define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
-
 
 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmaxps
@@ -1561,14 +1443,6 @@ define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
 
 
-define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntps
-  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
-
 
 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmulss
@@ -1743,12 +1617,12 @@ define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vphaddsw
-  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1783,12 +1657,12 @@ define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpmaddubsw
-  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1892,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
+
+define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpeqps
+  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpltps
+  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpleps
+  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunordps
+  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneqps
+  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnltps
+  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnleps
+  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpordps
+  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_uqps
+  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngeps
+  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngtps
+  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalseps
+  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_oqps
+  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgeps
+  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgtps
+  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrueps
+  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_osps
+  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmplt_oqps
+  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmple_oqps
+  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunord_sps
+  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_usps
+  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnlt_uqps
+  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnle_uqps
+  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpord_sps
+  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_usps
+  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnge_uqps
+  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngt_uqps
+  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalse_osps
+  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_osps
+  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpge_oqps
+  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgt_oqps
+  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrue_usps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
@@ -2007,30 +1949,6 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 
-define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
-  ; CHECK: vmovdqu
-  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
-
-
-define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
-  ; CHECK: vmovupd
-  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
-
-
-define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
-  ; CHECK: vmovups
-  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
-
-
 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: vmaskmovpd
   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -2143,29 +2061,10 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 
-define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
-  ; CHECK: vmovntdq
-  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 
-define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
-  ; CHECK: vmovntpd
-  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
 
 
-define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
-  ; CHECK: vmovntps
-  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
-
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
@@ -2245,8 +2144,11 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 
 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
-  ; CHECK: vmovdqu
-  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+  ; CHECK: vmovups
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
@@ -2254,7 +2156,9 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
 
 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   ; CHECK: vmovupd
-  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ; add operation forces the execution domain.
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
@@ -2292,20 +2196,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
 
 
-define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
 
 
 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
@@ -2433,6 +2337,12 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
+define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
+  ; CHECK: vpermilps
+  %a2 = load <4 x i32>* %a1
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
 
 
@@ -2575,4 +2485,73 @@ define void @test_x86_avx_vzeroupper() {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind
 
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
 
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse.sfence()
+  ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
+
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.lfence()
+  ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.mfence()
+  ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+  tail call void @llvm.x86.sse2.clflush(i8* %p)
+  ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind
+
+; CHECK: crc32b
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+; CHECK: crc32w
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+; CHECK: crc32l
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 07a63ef..c9fc66a 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
 
 ;;
 ;; The two tests below check that we must fold load + scalar_to_vector
-;; + ins_subvec+ zext into only a single vmovss or vmovsd
+;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
 
-; CHECK: vmovss (%
+; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
   %val = load float* %ptr
+; CHECK: vinsertps
+; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
   ret <8 x float> %i0
+; CHECK: ret
 }
 
-; CHECK: vmovsd (%
+; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
   %val = load double* %ptr
+; CHECK: vmovlpd
+; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
   ret <4 x double> %i0
+; CHECK: ret
 }
 
 ; CHECK: vmovaps  %ymm
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 518c09c..115cefb 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -7,7 +7,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandpd LCP{{.*}}(%rip)
@@ -16,7 +18,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vandps
@@ -45,7 +49,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vxorpd LCP{{.*}}(%rip)
@@ -54,7 +60,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vxorps
@@ -83,7 +91,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vorpd LCP{{.*}}(%rip)
@@ -92,7 +102,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vorps
@@ -122,7 +134,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnpd (%
@@ -134,7 +148,9 @@ entry:
   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnps
@@ -165,7 +181,9 @@ entry:
 ; CHECK: vpandn  %xmm
 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
   %x = and <2 x i64> %a, %y
   ret <2 x i64> %x
 }
@@ -173,7 +191,9 @@ entry:
 ; CHECK: vpand %xmm
 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
   ret <2 x i64> %x
 }
 
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index f36ba7b..7c58820 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -33,7 +33,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
 }
 
 ; UNSAFE: vmaxpd:
-; UNSAFE: vmaxpd %ymm
+; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
@@ -41,7 +41,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vminpd:
-; UNSAFE: vminpd %ymm
+; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
@@ -49,7 +49,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vmaxps:
-; UNSAFE: vmaxps %ymm
+; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
   %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
@@ -57,7 +57,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
 }
 
 ; UNSAFE: vminps:
-; UNSAFE: vminps %ymm
+; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
   %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
new file mode 100755
index 0000000..3713a8c
--- /dev/null
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_8i16_to_8i32
+;CHECK: vpmovsxwd
+
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_4i32_to_4i64
+;CHECK: vpmovsxdq
+
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 3ea39a2..681747b 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -62,6 +62,45 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
   ret <16 x i16> %s
 }
 
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: pcmpgtb
+define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: vpsrlw
+; CHECK: pand
+define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
+  %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsllw
+; CHECK: pand
+; CHECK: vpsllw
+; CHECK: pand
+define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
+  %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
 ;;; Support variable shifts
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
@@ -73,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   ret <8 x i32> %bitop
 }
 
+;;; Uses shifts for sign extension
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
new file mode 100755
index 0000000..5268ec3a
--- /dev/null
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i64> @test1(<4 x i64> %a) nounwind {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i64>%b
+ ; CHECK: test1:
+ ; CHECK: vinsertf128
+ }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 0db334d..16c447b 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,5 +6,199 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
+}
+
+; rdar://10538417
+define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
+; CHECK: test2:
+; CHECK: vinsertf128
+  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
+  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
+  ret <3 x i64> %2
+; CHECK: ret
+}
+
+define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
+  ret <4 x i64> %c
+; CHECK: test3:
+; CHECK: vperm2f128
+; CHECK: ret
+}
+
+define <8 x float> @test4(float %a) nounwind {
+  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
+  ret <8 x float> %b
+; CHECK: test4:
+; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: test5
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: test6
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+define <16 x i16> @test7(<4 x i16> %a) nounwind {
+; CHECK: test7
+  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: ret
+  ret <16 x i16> %b
+}
+
+; CHECK: test8
+define void @test8() {
+entry:
+  %0 = load <16 x i64> addrspace(1)* null, align 128
+  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
+  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
+  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
+; CHECK: ret
+  ret void
+}
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; check: test11
+; check: vpermilps $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: test12
+; CHECK: vpermilps $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
+
+; rdar://10974078
+define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+; CHECK: test16
+; CHECK: vmovups
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+; PR12413
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
+i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
+22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
+42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
+62>
+ ret <32 x i8> %0
+}
+
+; CHECK: blend1
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i64> %t
 }
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index af20b90..94bcddd 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
@@ -47,7 +47,7 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
+; CHECK: vmovaps
 ; CHECK-NEXT: vinsertf128  $1
 ; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 0000000..d007736
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_64_32
+; CHECK: pshufd
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_32_16
+; CHECK: pshufb
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index d420101..20f5345 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -67,6 +67,15 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpckhps (%
+define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpckhpd
 define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -74,6 +83,15 @@ entry:
   ret <4 x i64> %shuffle.i
 }
 
+; CHECK: vunpckhpd (%
+define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
 ; CHECK: vunpcklps
 define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -81,9 +99,63 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpcklps (%
+define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpcklpd
 define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
   %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vunpcklpd (%
+define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+; CHECK: vpunpckhwd
+; CHECK: vinsertf128
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+; CHECK: vpunpcklwd
+; CHECK: vinsertf128
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+; CHECK: vpunpckhbw
+; CHECK: vinsertf128
+define <32 x i8> @unpackhbw_undef(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+define <32 x i8> @unpacklbw_undef(<32 x i8> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
new file mode 100644
index 0000000..b0932bd
--- /dev/null
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; <rdar://problem/10463281>
+; Check that the <8 x float> is passed on the stack.
+
+@x = common global <8 x float> zeroinitializer, align 32
+declare i32 @f(i32, ...)
+
+; CHECK: test1:
+; CHECK: vmovaps	%ymm0, (%rsp)
+define void @test1() nounwind uwtable ssp {
+entry:
+  %0 = load <8 x float>* @x, align 32
+  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 89b4188..148ae73 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,4 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; XFAIL: *
-
-; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll
 
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
@@ -50,7 +47,7 @@ entry:
 ;;;; 128-bit versions
 
 ; CHECK: vbroadcastss (%
-define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
+define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
 entry:
   %q = load float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -60,6 +57,19 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
+
+; CHECK: _e2
+; CHECK-NOT: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+
 ; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -74,7 +84,7 @@ entry:
 ; Unsupported vbroadcasts
 
 ; CHECK: _G
-; CHECK-NOT: vbroadcastsd (%
+; CHECK-NOT: broadcast (%
 ; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -85,10 +95,20 @@ entry:
 }
 
 ; CHECK: _H
-; CHECK-NOT: vbroadcastss
+; CHECK-NOT: broadcast
 ; CHECK: ret
 define <4 x i32> @H(<4 x i32> %a) {
   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   ret <4 x i32> %x
 }
 
+; CHECK: _I
+; CHECK-NOT: broadcast (%
+; CHECK: ret
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index dccf901..fe0f6ca 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: @A
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -8,6 +9,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: @B
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -16,3 +18,89 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: @t0
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+; CHECK: @t1
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to i8*
+  tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @t2
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to <2 x double>*
+  store <2 x double> %0, <2 x double>* %1, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+; CHECK: @t3
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to i8*
+  tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+; CHECK: @t4
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 16
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; CHECK: @t5
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovdqu %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <2 x i64>* %addr to i8*
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index cda1331..9a954fe 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -56,3 +56,76 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
   %2 = add <8 x i32> %1, %v1
   ret <8 x i32> %2
 }
+
+; CHECK: insert_pd
+define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vinsertf128
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+
+; CHECK: insert_undef_pd
+define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+; CHECK: insert_ps
+define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+
+; CHECK: insert_undef_ps
+define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+; CHECK: insert_si
+define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+
+; CHECK: insert_undef_si
+define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+; rdar://10643481
+; CHECK: vinsertf128_combine
+define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovaps
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
+
+; rdar://11076953
+; CHECK: vinsertf128_ucombine
+define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovups
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index 3550a90..caa21e5 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: _A
 ; CHECK: vperm2f128 $1
 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -7,6 +8,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _B
 ; CHECK: vperm2f128 $48
 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +16,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _C
 ; CHECK: vperm2f128 $0
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -21,6 +24,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _D
 ; CHECK: vperm2f128 $17
 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -28,6 +32,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _E
 ; CHECK: vperm2f128 $17
 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
 entry:
@@ -35,7 +40,8 @@ entry:
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vperm2f128 $33
+; CHECK: _E2
+; CHECK: vperm2f128 $3
 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -44,6 +50,7 @@ entry:
 
 ;;;; Cases with undef indicies mixed in the mask
 
+; CHECK: _F
 ; CHECK: vperm2f128 $33
 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 49b2f54..cb904b9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -28,6 +28,14 @@ entry:
   ret <4 x i64> %shuffle
 }
 
+; CHECK: vpermilpd
+define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
 ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
 ; target specific mask was correctly generated.
 ; CHECK: vpermilps $-100
@@ -37,7 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK-NOT: vpermilps
+; CHECK: palignr
+; CHECK: palignr
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index f06548d..45883b7 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,6 +7,31 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x float>* %a
+  %b2 = load <8 x float>* %b
+  %shuffle = shufflevector <8 x float> %a2, <8 x float> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %ymm
+define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x i32>* %a
+  %b2 = load <8 x i32>* %b
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +39,31 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x double>* %a
+  %b2 = load <4 x double>* %b
+  %shuffle = shufflevector <4 x double> %a2, <4 x double> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufpd  $10, %ymm
+define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %b2 = load <4 x i64>* %b
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -27,3 +77,81 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps $-55, %ymm
+define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $8, %ymm
+define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x float>* %a
+  %b2 = load <4 x float>* %b
+  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i32>* %a
+  %b2 = load <4 x i32>* %b
+  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x double>* %a
+  %b2 = load <2 x double>* %b
+  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x i64>* %a
+  %b2 = load <2 x i64>* %b
+  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
index eaf236c..bf4ab5b 100644
--- a/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -1,26 +1,83 @@
 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
-define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <4 x float> %a, %a
-  ret <4 x float> %add.i
-}
+declare <4 x float> @do_sse(<4 x float>)
+declare <8 x float> @do_avx(<8 x float>)
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+@x = common global <4 x float> zeroinitializer, align 16
+@g = common global <8 x float> zeroinitializer, align 32
+
+;; Basic checking - don't emit any vzeroupper instruction
 
 ; CHECK: _test00
 define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
 entry:
+  ; CHECK-NOT: vzeroupper
   %add.i = fadd <4 x float> %a, %b
+  %call3 = call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  ; CHECK: ret
+  ret <4 x float> %call3
+}
+
+;; Check parameter 256-bit parameter passing
+
+; CHECK: _test01
+define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
+entry:
+  %tmp = load <4 x float>* @x, align 16
   ; CHECK: vzeroupper
   ; CHECK-NEXT: callq _do_sse
-  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
-  %sub.i = fsub <4 x float> %call3, %add.i
+  %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* @x, align 16
   ; CHECK-NOT: vzeroupper
-  ; CHECK: callq _do_sse_local
-  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: callq _do_sse
+  %call2 = tail call <4 x float> @do_sse(<4 x float> %call) nounwind
+  store <4 x float> %call2, <4 x float>* @x, align 16
+  ; CHECK: ret
+  ret <8 x float> %c
+}
+
+;; Test the pass convergence and also that vzeroupper is only issued when necessary,
+;; for this function it should be only once
+
+; CHECK: _test02
+define <4 x float> @test02(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  ; CHECK: LBB
+  ; CHECK-NOT: vzeroupper
+  %i.018 = phi i32 [ 0, %entry ], [ %1, %for.body ]
+  %c.017 = phi <4 x float> [ %add.i, %entry ], [ %call14, %for.body ]
+  ; CHECK: callq _do_sse
+  %call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
+  ; CHECK-NEXT: callq _do_sse
+  %call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
+  %tmp11 = load <8 x float>* @g, align 32
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
   ; CHECK: vzeroupper
-  ; CHECK-NEXT: jmp _do_sse
-  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
-  ret <4 x float> %call10
+  ; CHECK-NEXT: callq _do_sse
+  %call14 = tail call <4 x float> @do_sse(<4 x float> %0) nounwind
+  %1 = add nsw i32 %i.018, 1
+  %exitcond = icmp eq i32 %1, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret <4 x float> %call14
 }
 
-declare <4 x float> @do_sse(<4 x float>)
+;; Check that we also perform vzeroupper when we return from a function.
+
+; CHECK: _test03
+define <4 x float> @test03(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: call
+  %call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
+  %shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK: vzeroupper
+  ; CHECK: ret
+  ret <4 x float> %shuf2
+}
diff --git a/test/CodeGen/X86/avx-win64-args.ll b/test/CodeGen/X86/avx-win64-args.ll
new file mode 100755
index 0000000..85b2634
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64-args.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+declare <8 x float> @foo(<8 x float>, i32)
+
+define <8 x float> @test1(<8 x float> %x, <8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+; CHECK: test1
+; CHECK: leaq {{.*}}, %rcx
+; CHECK: movl {{.*}}, %edx
+; CHECK: call
+; CHECK: ret
+  %x1 = fadd  <8 x float>  %x, %y
+  %call = call  <8 x float> @foo(<8 x float> %x1, i32 1) nounwind
+  %y1 = fsub  <8 x float>  %call, %y
+  ret <8 x float> %y1
+}
+
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
new file mode 100644
index 0000000..dc6bd59
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11862
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+; This function has live ymm registers across a win64 call.
+; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved.
+; Verify that callee-saved registers are not being used.
+
+; CHECK: f___vyf
+; CHECK: pushq %rbp
+; CHECK: vmovmsk
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: call
+; Two reloads. It's OK if these get folded.
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: blend
+define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone {
+allocas:
+  %bincmp = fcmp oeq <8 x float> %x, zeroinitializer
+  %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32>
+  %"~test" = xor <8 x i32> %val_to_boolvec32, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask
+  %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float>
+  %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone
+  %any_mm_cmp27 = icmp eq i32 %v.i47, 0
+  br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false
+
+safe_if_run_false:                                ; preds = %allocas
+  %binop = fadd <8 x float> %x, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25")
+  %binop33 = fadd <8 x float> %calltmp, %x
+  %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float>
+  %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind
+  br label %safe_if_after_false
+
+safe_if_after_false:                              ; preds = %safe_if_run_false, %allocas
+  %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ]
+  ret <8 x float> %0
+}
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
new file mode 100755
index 0000000..b630e9d
--- /dev/null
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_8i16_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: ret
+
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_4i32_to_4i64
+;CHECK: vpunpckhdq
+;CHECK: ret
+
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
new file mode 100644
index 0000000..09f9538
--- /dev/null
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpaddq %ymm
+define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpaddd %ymm
+define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpaddw %ymm
+define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpaddb %ymm
+define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpsubq %ymm
+define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpsubd %ymm
+define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpsubw %ymm
+define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpsubb %ymm
+define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpmulld %ymm
+define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = mul <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpmullw %ymm
+define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpmuludq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = mul <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll
new file mode 100644
index 0000000..df30d9e
--- /dev/null
+++ b/test/CodeGen/X86/avx2-cmp.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpcmpgtd  %ymm
+define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpgtq  %ymm
+define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp slt <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpgtw  %ymm
+define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpgtb  %ymm
+define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+; CHECK: vpcmpeqd  %ymm
+define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpeqq  %ymm
+define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp eq <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpeqw  %ymm
+define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpeqb  %ymm
+define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
new file mode 100644
index 0000000..1fb41c0
--- /dev/null
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -0,0 +1,994 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
+  ; CHECK: vbroadcasti128
+  %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+
+define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermd
+  %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vpermps
+  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) {
+  ; CHECK: vpermq
+  %res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly
+
+
+define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) {
+  ; CHECK: vpermpd
+  %res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vperm2i128
+  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+  ; CHECK: vextracti128
+  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vinserti128
+  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
+
+
+define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
+
+
+define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
new file mode 100644
index 0000000..13ebaa6
--- /dev/null
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpandn
+; CHECK: vpandn  %ymm
+; CHECK: ret
+define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+; CHECK: vpand
+; CHECK: vpand %ymm
+; CHECK: ret
+define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpor
+; CHECK: vpor %ymm
+; CHECK: ret
+define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpxor
+; CHECK: vpxor %ymm
+; CHECK: ret
+define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpblendvb
+; CHECK: vpblendvb %ymm
+; CHECK: ret
+define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
+  %min_is_x = icmp ult <32 x i8> %x, %y
+  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %min
+}
+
+define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %a, %0
+  %2 = and <8 x i32> %b.lobit, %sub
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %c, %0
+  %2 = and <8 x i32> %a, %b.lobit
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @allOnes() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <16 x i16> @allOnes2() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+}
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
new file mode 100644
index 0000000..0768aae
--- /dev/null
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
+
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
+; CHECK: vmovntps
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
+; CHECK: vmovntdq
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: vmovntpd
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
+  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
new file mode 100644
index 0000000..53b9da3
--- /dev/null
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+;
+define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: vpalignr $8
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
+  ret <8 x i32> %C
+}
+
+define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test5:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %C
+}
+
+define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr $5
+  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
+  ret <32 x i8> %C
+}
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
new file mode 100644
index 0000000..4eac71d
--- /dev/null
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s
+
+; CHECK: phaddw1:
+; CHECK: vphaddw
+define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddw2:
+; CHECK: vphaddw
+define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 0, i32 2, i32 4, i32 6, i32 24, i32 26, i32 28, i32 30, i32 8, i32 10, i32 12, i32 14>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddd1:
+; CHECK: vphaddd
+define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd2:
+; CHECK: vphaddd
+define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd3:
+; CHECK: vphaddd
+define <8 x i32> @phaddd3(<8 x i32> %x) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubw1:
+; CHECK: vphsubw
+define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = sub <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phsubd1:
+; CHECK: vphsubd
+define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubd2:
+; CHECK: vphsubd
+define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 undef, i32 9, i32 11, i32 5, i32 7, i32 undef, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
new file mode 100644
index 0000000..1f192a0
--- /dev/null
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -0,0 +1,268 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: variable_shl0
+; CHECK: psllvd
+; CHECK: ret
+define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = shl <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1
+; CHECK: psllvd
+; CHECK: ret
+define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = shl <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2
+; CHECK: psllvq
+; CHECK: ret
+define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = shl <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3
+; CHECK: psllvq
+; CHECK: ret
+define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = shl <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0
+; CHECK: psrlvd
+; CHECK: ret
+define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = lshr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1
+; CHECK: psrlvd
+; CHECK: ret
+define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = lshr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = lshr <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3
+; CHECK: psrlvq
+; CHECK: ret
+define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = lshr <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+
+; CHECK: variable_sra0
+; CHECK: vpsravd
+; CHECK: ret
+define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
+  %k = ashr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_sra1
+; CHECK: vpsravd
+; CHECK: ret
+define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
+  %k = ashr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+
+;;; Shift left
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: variable_sra0_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = ashr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+
+; CHECK: variable_sra1_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = ashr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+
+; CHECK: variable_shl0_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = shl <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = shl <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = shl <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = shl <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = lshr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = lshr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = lshr <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = lshr <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+
+define <32 x i8> @shl9(<32 x i8> %A) nounwind {
+  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shl9:
+; CHECK: vpsllw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @shr9(<32 x i8> %A) nounwind {
+  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shr9:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8_7:
+; CHECK: vpxor
+; CHECK: vpcmpgtb
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: vpxor
+; CHECK: vpsubb
+; CHECK: ret
+}
+
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK-NOT: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK-NOT: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
new file mode 100644
index 0000000..6d17443
--- /dev/null
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckldq
+define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpcklqdq
+define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
new file mode 100644
index 0000000..1a78414
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpbroadcastb (%
+define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
+  ret <16 x i8> %qf
+}
+; CHECK: vpbroadcastb (%
+define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
+
+  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
+  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
+  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
+  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
+  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
+  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
+  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
+  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
+  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
+  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
+  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
+  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
+  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
+  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
+  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
+  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
+  ret <32 x i8> %q2f
+}
+; CHECK: vpbroadcastw (%
+
+define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
+  ret <8 x i16> %q7
+}
+; CHECK: vpbroadcastw (%
+define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
+  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
+  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
+  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
+  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
+  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
+  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
+  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
+  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
+  ret <16 x i16> %qf
+}
+; CHECK: vpbroadcastd (%
+define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
+  ret <4 x i32> %q3
+}
+; CHECK: vpbroadcastd (%
+define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
+  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
+  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
+  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
+  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
+  ret <8 x i32> %q7
+}
+; CHECK: vpbroadcastq (%
+define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
+  ret <2 x i64> %q1
+}
+; CHECK: vpbroadcastq (%
+define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
+  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
+  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
+  ret <4 x i64> %q3
+}
+
+; make sure that we still don't support broadcast double into 128-bit vector
+; this used to crash
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
+
+; CHECK: V111
+; CHECK: vpbroadcastd
+; CHECK: ret
+define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+entry:
+  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %g
+}
+
+; CHECK: _e2
+; CHECK: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  ret <8 x i8> %vecinit7.i
+}
diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll
new file mode 100644
index 0000000..1937db5
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm2i128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vperm2i128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2i128 $3
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vperm2i128 $49
+define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vperm2i128 $2
+define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vperm2i128 $2, (%
+define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
+entry:
+  %c = load <16 x i16>* %a
+  %d = load <16 x i16>* %b
+  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ac972a8..ceabcb7 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -11,7 +11,7 @@ entry:
 
 define float @extractFloat2() nounwind {
 entry:
-  ; CHECK: pxor	%xmm0, %xmm0
+  ; CHECK: xorps	%xmm0, %xmm0
   %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
   %tmp5 = extractelement <2 x float> %tmp4, i32 1
   ret float %tmp5
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
new file mode 100644
index 0000000..3a10c70
--- /dev/null
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+
+; In this test we check that sign-extend of the mask bit is performed by
+; shifting the needed bit to the MSB, and not using shl+sra.
+
+;CHECK: vsel_float
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+;CHECK: vsel_4xi8
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+
+; We do not have native support for v8i16 blends and we have to use the
+; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
+; reduce the mask in this case.
+;CHECK: vsel_8xi16
+;CHECK: psllw
+;CHECK: psraw
+;CHECK: pblendvb
+;CHECK: ret
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
new file mode 100644
index 0000000..167d522
--- /dev/null
+++ b/test/CodeGen/X86/block-placement.ll
@@ -0,0 +1,930 @@
+; RUN: llc -mtriple=i686-linux -enable-block-placement < %s | FileCheck %s
+
+declare void @error(i32 %i, i32 %a, i32 %b)
+
+define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
+; Test a chain of ifs, where the block guarded by the if is error handling code
+; that is not expected to run.
+; CHECK: test_ifchains:
+; CHECK: %entry
+; CHECK: %else1
+; CHECK: %else2
+; CHECK: %else3
+; CHECK: %else4
+; CHECK: %exit
+; CHECK: %then1
+; CHECK: %then2
+; CHECK: %then3
+; CHECK: %then4
+; CHECK: %then5
+
+entry:
+  %gep1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %gep1
+  %cond1 = icmp ugt i32 %val1, 1
+  br i1 %cond1, label %then1, label %else1, !prof !0
+
+then1:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else1
+
+else1:
+  %gep2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %gep2
+  %cond2 = icmp ugt i32 %val2, 2
+  br i1 %cond2, label %then2, label %else2, !prof !0
+
+then2:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else2
+
+else2:
+  %gep3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %gep3
+  %cond3 = icmp ugt i32 %val3, 3
+  br i1 %cond3, label %then3, label %else3, !prof !0
+
+then3:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else3
+
+else3:
+  %gep4 = getelementptr i32* %a, i32 4
+  %val4 = load i32* %gep4
+  %cond4 = icmp ugt i32 %val4, 4
+  br i1 %cond4, label %then4, label %else4, !prof !0
+
+then4:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else4
+
+else4:
+  %gep5 = getelementptr i32* %a, i32 3
+  %val5 = load i32* %gep5
+  %cond5 = icmp ugt i32 %val5, 3
+  br i1 %cond5, label %then5, label %exit, !prof !0
+
+then5:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %exit
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
+; Check that we sink cold loop blocks after the hot loop body.
+; CHECK: test_loop_cold_blocks:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %unlikely1
+; CHECK: %unlikely2
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body3 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body3 ]
+  %unlikelycond1 = icmp slt i32 %base, 42
+  br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0
+
+unlikely1:
+  call void @error(i32 %i, i32 1, i32 %base)
+  br label %body2
+
+body2:
+  %unlikelycond2 = icmp sgt i32 %base, 21
+  br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0
+
+unlikely2:
+  call void @error(i32 %i, i32 2, i32 %base)
+  br label %body3
+
+body3:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define i32 @test_loop_early_exits(i32 %i, i32* %a) {
+; Check that we sink early exit blocks out of loop bodies.
+; CHECK: test_loop_early_exits:
+; CHECK: %entry
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %body4
+; CHECK: %body1
+; CHECK: %bail1
+; CHECK: %bail2
+; CHECK: %bail3
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body4 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body4 ]
+  %bailcond1 = icmp eq i32 %base, 42
+  br i1 %bailcond1, label %bail1, label %body2
+
+bail1:
+  ret i32 -1
+
+body2:
+  %bailcond2 = icmp eq i32 %base, 43
+  br i1 %bailcond2, label %bail2, label %body3
+
+bail2:
+  ret i32 -2
+
+body3:
+  %bailcond3 = icmp eq i32 %base, 44
+  br i1 %bailcond3, label %bail3, label %body4
+
+bail3:
+  ret i32 -3
+
+body4:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_loop_rotate(i32 %i, i32* %a) {
+; Check that we rotate conditional exits from the loop to the bottom of the
+; loop, eliminating unconditional branches to the top.
+; CHECK: test_loop_rotate:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body0
+; CHECK: %exit
+
+entry:
+  br label %body0
+
+body0:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+body1:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %bailcond1 = icmp eq i32 %sum, 42
+  br label %body0
+
+exit:
+  ret i32 %base
+}
+
+define void @test_loop_rotate_reversed_blocks() {
+; This test case (greatly reduced from an Olden bencmark) ensures that the loop
+; rotate implementation doesn't assume that loops are laid out in a particular
+; order. The first loop will get split into two basic blocks, with the loop
+; header coming after the loop latch.
+;
+; CHECK: test_loop_rotate_reversed_blocks
+; CHECK: %entry
+; Look for a jump into the middle of the loop, and no branches mid-way.
+; CHECK: jmp
+; CHECK: %loop1
+; CHECK-NOT: j{{\w*}} .LBB{{.*}}
+; CHECK: %loop1
+; CHECK: je
+
+entry:
+  %cond1 = load volatile i1* undef
+  br i1 %cond1, label %loop2.preheader, label %loop1
+
+loop1:
+  call i32 @f()
+  %cond2 = load volatile i1* undef
+  br i1 %cond2, label %loop2.preheader, label %loop1
+
+loop2.preheader:
+  call i32 @f()
+  %cond3 = load volatile i1* undef
+  br i1 %cond3, label %exit, label %loop2
+
+loop2:
+  call i32 @f()
+  %cond4 = load volatile i1* undef
+  br i1 %cond4, label %exit, label %loop2
+
+exit:
+  ret void
+}
+
+define i32 @test_loop_align(i32 %i, i32* %a) {
+; Check that we provide basic loop body alignment with the block placement
+; pass.
+; CHECK: test_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN:[0-9]+]],
+; CHECK-NEXT: %body
+; CHECK: %exit
+
+entry:
+  br label %body
+
+body:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
+; Check that we provide nested loop body alignment.
+; CHECK: test_nested_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %loop.body.1
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %inner.loop.body
+; CHECK-NOT: .align
+; CHECK: %exit
+
+entry:
+  br label %loop.body.1
+
+loop.body.1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %bidx = load i32* %arrayidx
+  br label %inner.loop.body
+
+inner.loop.body:
+  %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
+  %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
+  %scaled_idx = mul i32 %bidx, %iv
+  %inner.arrayidx = getelementptr inbounds i32* %b, i32 %scaled_idx
+  %0 = load i32* %inner.arrayidx
+  %sum = add nsw i32 %0, %base
+  %inner.next = add i32 %iv, 1
+  %inner.exitcond = icmp eq i32 %inner.next, %i
+  br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body
+
+loop.body.2:
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %loop.body.1
+
+exit:
+  ret i32 %sum
+}
+
+define void @unnatural_cfg1() {
+; Test that we can handle a loop with an inner unnatural loop at the end of
+; a function. This is a gross CFG reduced out of the single source GCC.
+; CHECK: unnatural_cfg1
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.body3
+
+entry:
+  br label %loop.header
+
+loop.header:
+  br label %loop.body1
+
+loop.body1:
+  br i1 undef, label %loop.body3, label %loop.body2
+
+loop.body2:
+  %ptr = load i32** undef, align 4
+  br label %loop.body3
+
+loop.body3:
+  %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
+  %bcmyptr = bitcast i32* %myptr to i32*
+  %val = load i32* %bcmyptr, align 4
+  %comp = icmp eq i32 %val, 48
+  br i1 %comp, label %loop.body4, label %loop.body5
+
+loop.body4:
+  br i1 undef, label %loop.header, label %loop.body5
+
+loop.body5:
+  %ptr2 = load i32** undef, align 4
+  br label %loop.body3
+}
+
+define void @unnatural_cfg2() {
+; Test that we can handle a loop with a nested natural loop *and* an unnatural
+; loop. This was reduced from a crash on block placement when run over
+; single-source GCC.
+; CHECK: unnatural_cfg2
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.header
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
+; The end block is folded with %loop.body3...
+; CHECK-NOT: %loop.inner1.end
+; CHECK: %loop.body4
+; CHECK: %loop.inner2.begin
+; The loop.inner2.end block is folded
+; CHECK: %bail
+
+entry:
+  br label %loop.header
+
+loop.header:
+  %comp0 = icmp eq i32* undef, null
+  br i1 %comp0, label %bail, label %loop.body1
+
+loop.body1:
+  %val0 = load i32** undef, align 4
+  br i1 undef, label %loop.body2, label %loop.inner1.begin
+
+loop.body2:
+  br i1 undef, label %loop.body4, label %loop.body3
+
+loop.body3:
+  %ptr1 = getelementptr inbounds i32* %val0, i32 0
+  %castptr1 = bitcast i32* %ptr1 to i32**
+  %val1 = load i32** %castptr1, align 4
+  br label %loop.inner1.begin
+
+loop.inner1.begin:
+  %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+  %castval = bitcast i32* %valphi to i32*
+  %comp1 = icmp eq i32 undef, 48
+  br i1 %comp1, label %loop.inner1.end, label %loop.body4
+
+loop.inner1.end:
+  %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+  %castptr2 = bitcast i32* %ptr2 to i32**
+  %val2 = load i32** %castptr2, align 4
+  br label %loop.inner1.begin
+
+loop.body4.dead:
+  br label %loop.body4
+
+loop.body4:
+  %comp2 = icmp ult i32 undef, 3
+  br i1 %comp2, label %loop.inner2.begin, label %loop.end
+
+loop.inner2.begin:
+  br i1 false, label %loop.end, label %loop.inner2.end
+
+loop.inner2.end:
+  %comp3 = icmp eq i32 undef, 1769472
+  br i1 %comp3, label %loop.end, label %loop.inner2.begin
+
+loop.end:
+  br label %loop.header
+
+bail:
+  unreachable
+}
+
+define i32 @problematic_switch() {
+; This function's CFG caused overlow in the machine branch probability
+; calculation, triggering asserts. Make sure we don't crash on it.
+; CHECK: problematic_switch
+
+entry:
+  switch i32 undef, label %exit [
+    i32 879, label %bogus
+    i32 877, label %step
+    i32 876, label %step
+    i32 875, label %step
+    i32 874, label %step
+    i32 873, label %step
+    i32 872, label %step
+    i32 868, label %step
+    i32 867, label %step
+    i32 866, label %step
+    i32 861, label %step
+    i32 860, label %step
+    i32 856, label %step
+    i32 855, label %step
+    i32 854, label %step
+    i32 831, label %step
+    i32 830, label %step
+    i32 829, label %step
+    i32 828, label %step
+    i32 815, label %step
+    i32 814, label %step
+    i32 811, label %step
+    i32 806, label %step
+    i32 805, label %step
+    i32 804, label %step
+    i32 803, label %step
+    i32 802, label %step
+    i32 801, label %step
+    i32 800, label %step
+    i32 799, label %step
+    i32 798, label %step
+    i32 797, label %step
+    i32 796, label %step
+    i32 795, label %step
+  ]
+bogus:
+  unreachable
+step:
+  br label %exit
+exit:
+  %merge = phi i32 [ 3, %step ], [ 6, %entry ]
+  ret i32 %merge
+}
+
+define void @fpcmp_unanalyzable_branch(i1 %cond) {
+; This function's CFG contains an unanalyzable branch that is likely to be
+; split due to having a different high-probability predecessor.
+; CHECK: fpcmp_unanalyzable_branch
+; CHECK: %entry
+; CHECK: %exit
+; CHECK-NOT: %if.then
+; CHECK-NOT: %if.end
+; CHECK-NOT: jne
+; CHECK-NOT: jnp
+; CHECK: jne
+; CHECK-NEXT: jnp
+; CHECK-NEXT: %if.then
+
+entry:
+; Note that this branch must be strongly biased toward
+; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
+; chain which would violate the unanalyzable branch in 'exit', but we won't even
+; try this trick unless 'if.then' is believed to almost always be reached from
+; 'entry.if.then_crit_edge'.
+  br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
+
+entry.if.then_crit_edge:
+  %.pre14 = load i8* undef, align 1, !tbaa !0
+  br label %if.then
+
+lor.lhs.false:
+  br i1 undef, label %if.end, label %exit
+
+exit:
+  %cmp.i = fcmp une double 0.000000e+00, undef
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
+  %1 = and i8 %0, 1
+  store i8 %1, i8* undef, align 4, !tbaa !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+
+declare i32 @f()
+declare i32 @g()
+declare i32 @h(i32 %x)
+
+define i32 @test_global_cfg_break_profitability() {
+; Check that our metrics for the profitability of a CFG break are global rather
+; than local. A successor may be very hot, but if the current block isn't, it
+; doesn't matter. Within this test the 'then' block is slightly warmer than the
+; 'else' block, but not nearly enough to merit merging it with the exit block
+; even though the probability of 'then' branching to the 'exit' block is very
+; high.
+; CHECK: test_global_cfg_break_profitability
+; CHECK: calll {{_?}}f
+; CHECK: calll {{_?}}g
+; CHECK: calll {{_?}}h
+; CHECK: ret
+
+entry:
+  br i1 undef, label %then, label %else, !prof !2
+
+then:
+  %then.result = call i32 @f()
+  br label %exit
+
+else:
+  %else.result = call i32 @g()
+  br label %exit
+
+exit:
+  %result = phi i32 [ %then.result, %then ], [ %else.result, %else ]
+  %result2 = call i32 @h(i32 %result)
+  ret i32 %result
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @test_eh_lpad_successor() {
+; Some times the landing pad ends up as the first successor of an invoke block.
+; When this happens, a strange result used to fall out of updateTerminators: we
+; didn't correctly locate the fallthrough successor, assuming blindly that the
+; first one was the fallthrough successor. As a result, we would add an
+; erroneous jump to the landing pad thinking *that* was the default successor.
+; CHECK: test_eh_lpad_successor
+; CHECK: %entry
+; CHECK-NOT: jmp
+; CHECK: %loop
+
+entry:
+  invoke i32 @f() to label %preheader unwind label %lpad
+
+preheader:
+  br label %loop
+
+lpad:
+  %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %lpad.val
+
+loop:
+  br label %loop
+}
+
+declare void @fake_throw() noreturn
+
+define void @test_eh_throw() {
+; For blocks containing a 'throw' (or similar functionality), we have
+; a no-return invoke. In this case, only EH successors will exist, and
+; fallthrough simply won't occur. Make sure we don't crash trying to update
+; terminators for such constructs.
+;
+; CHECK: test_eh_throw
+; CHECK: %entry
+; CHECK: %cleanup
+
+entry:
+  invoke void @fake_throw() to label %continue unwind label %cleanup
+
+continue:
+  unreachable
+
+cleanup:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  unreachable
+}
+
+define void @test_unnatural_cfg_backwards_inner_loop() {
+; Test that when we encounter an unnatural CFG structure after having formed
+; a chain for an inner loop which happened to be laid out backwards we don't
+; attempt to merge onto the wrong end of the inner loop just because we find it
+; first. This was reduced from a crasher in GCC's single source.
+;
+; CHECK: test_unnatural_cfg_backwards_inner_loop
+; CHECK: %entry
+; CHECK: %body
+; CHECK: %loop2b
+; CHECK: %loop1
+; CHECK: %loop2a
+
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:
+  br label %loop2a
+
+loop1:
+  %next.load = load i32** undef
+  br i1 %comp.a, label %loop2a, label %loop2b
+
+loop2a:
+  %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
+  %comp.a = icmp eq i32* %var, null
+  br label %loop3
+
+loop2b:
+  %gep = getelementptr inbounds i32* %var.phi, i32 0
+  %next.ptr = bitcast i32* %gep to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop3
+
+loop3:
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  br label %loop1
+}
+
+define void @unanalyzable_branch_to_loop_header() {
+; Ensure that we can handle unanalyzable branches into loop headers. We
+; pre-form chains for unanalyzable branches, and will find the tail end of that
+; at the start of the loop. This function uses floating point comparison
+; fallthrough because that happens to always produce unanalyzable branches on
+; x86.
+;
+; CHECK: unanalyzable_branch_to_loop_header
+; CHECK: %entry
+; CHECK: %loop
+; CHECK: %exit
+
+entry:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %cond = icmp eq i8 undef, 42
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_best_succ(i1 %cond) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the optimal sucessor to merge.
+;
+; CHECK: unanalyzable_branch_to_best_succ
+; CHECK: %entry
+; CHECK: %foo
+; CHECK: %bar
+; CHECK: %exit
+
+entry:
+  ; Bias this branch toward bar to ensure we form that chain.
+  br i1 %cond, label %bar, label %foo, !prof !1
+
+foo:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %bar, label %exit
+
+bar:
+  call i32 @f()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_free_block(float %x) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the best free block in the CFG.
+;
+; CHECK: unanalyzable_branch_to_free_block
+; CHECK: %entry
+; CHECK: %a
+; CHECK: %b
+; CHECK: %c
+; CHECK: %exit
+
+entry:
+  br i1 undef, label %a, label %b
+
+a:
+  call i32 @f()
+  br label %c
+
+b:
+  %cmp = fcmp une float %x, undef
+  br i1 %cmp, label %c, label %exit
+
+c:
+  call i32 @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @many_unanalyzable_branches() {
+; Ensure that we don't crash as we're building up many unanalyzable branches,
+; blocks, and loops.
+;
+; CHECK: many_unanalyzable_branches
+; CHECK: %entry
+; CHECK: %exit
+
+entry:
+  br label %0
+
+  %val0 = load volatile float* undef
+  %cmp0 = fcmp une float %val0, undef
+  br i1 %cmp0, label %1, label %0
+  %val1 = load volatile float* undef
+  %cmp1 = fcmp une float %val1, undef
+  br i1 %cmp1, label %2, label %1
+  %val2 = load volatile float* undef
+  %cmp2 = fcmp une float %val2, undef
+  br i1 %cmp2, label %3, label %2
+  %val3 = load volatile float* undef
+  %cmp3 = fcmp une float %val3, undef
+  br i1 %cmp3, label %4, label %3
+  %val4 = load volatile float* undef
+  %cmp4 = fcmp une float %val4, undef
+  br i1 %cmp4, label %5, label %4
+  %val5 = load volatile float* undef
+  %cmp5 = fcmp une float %val5, undef
+  br i1 %cmp5, label %6, label %5
+  %val6 = load volatile float* undef
+  %cmp6 = fcmp une float %val6, undef
+  br i1 %cmp6, label %7, label %6
+  %val7 = load volatile float* undef
+  %cmp7 = fcmp une float %val7, undef
+  br i1 %cmp7, label %8, label %7
+  %val8 = load volatile float* undef
+  %cmp8 = fcmp une float %val8, undef
+  br i1 %cmp8, label %9, label %8
+  %val9 = load volatile float* undef
+  %cmp9 = fcmp une float %val9, undef
+  br i1 %cmp9, label %10, label %9
+  %val10 = load volatile float* undef
+  %cmp10 = fcmp une float %val10, undef
+  br i1 %cmp10, label %11, label %10
+  %val11 = load volatile float* undef
+  %cmp11 = fcmp une float %val11, undef
+  br i1 %cmp11, label %12, label %11
+  %val12 = load volatile float* undef
+  %cmp12 = fcmp une float %val12, undef
+  br i1 %cmp12, label %13, label %12
+  %val13 = load volatile float* undef
+  %cmp13 = fcmp une float %val13, undef
+  br i1 %cmp13, label %14, label %13
+  %val14 = load volatile float* undef
+  %cmp14 = fcmp une float %val14, undef
+  br i1 %cmp14, label %15, label %14
+  %val15 = load volatile float* undef
+  %cmp15 = fcmp une float %val15, undef
+  br i1 %cmp15, label %16, label %15
+  %val16 = load volatile float* undef
+  %cmp16 = fcmp une float %val16, undef
+  br i1 %cmp16, label %17, label %16
+  %val17 = load volatile float* undef
+  %cmp17 = fcmp une float %val17, undef
+  br i1 %cmp17, label %18, label %17
+  %val18 = load volatile float* undef
+  %cmp18 = fcmp une float %val18, undef
+  br i1 %cmp18, label %19, label %18
+  %val19 = load volatile float* undef
+  %cmp19 = fcmp une float %val19, undef
+  br i1 %cmp19, label %20, label %19
+  %val20 = load volatile float* undef
+  %cmp20 = fcmp une float %val20, undef
+  br i1 %cmp20, label %21, label %20
+  %val21 = load volatile float* undef
+  %cmp21 = fcmp une float %val21, undef
+  br i1 %cmp21, label %22, label %21
+  %val22 = load volatile float* undef
+  %cmp22 = fcmp une float %val22, undef
+  br i1 %cmp22, label %23, label %22
+  %val23 = load volatile float* undef
+  %cmp23 = fcmp une float %val23, undef
+  br i1 %cmp23, label %24, label %23
+  %val24 = load volatile float* undef
+  %cmp24 = fcmp une float %val24, undef
+  br i1 %cmp24, label %25, label %24
+  %val25 = load volatile float* undef
+  %cmp25 = fcmp une float %val25, undef
+  br i1 %cmp25, label %26, label %25
+  %val26 = load volatile float* undef
+  %cmp26 = fcmp une float %val26, undef
+  br i1 %cmp26, label %27, label %26
+  %val27 = load volatile float* undef
+  %cmp27 = fcmp une float %val27, undef
+  br i1 %cmp27, label %28, label %27
+  %val28 = load volatile float* undef
+  %cmp28 = fcmp une float %val28, undef
+  br i1 %cmp28, label %29, label %28
+  %val29 = load volatile float* undef
+  %cmp29 = fcmp une float %val29, undef
+  br i1 %cmp29, label %30, label %29
+  %val30 = load volatile float* undef
+  %cmp30 = fcmp une float %val30, undef
+  br i1 %cmp30, label %31, label %30
+  %val31 = load volatile float* undef
+  %cmp31 = fcmp une float %val31, undef
+  br i1 %cmp31, label %32, label %31
+  %val32 = load volatile float* undef
+  %cmp32 = fcmp une float %val32, undef
+  br i1 %cmp32, label %33, label %32
+  %val33 = load volatile float* undef
+  %cmp33 = fcmp une float %val33, undef
+  br i1 %cmp33, label %34, label %33
+  %val34 = load volatile float* undef
+  %cmp34 = fcmp une float %val34, undef
+  br i1 %cmp34, label %35, label %34
+  %val35 = load volatile float* undef
+  %cmp35 = fcmp une float %val35, undef
+  br i1 %cmp35, label %36, label %35
+  %val36 = load volatile float* undef
+  %cmp36 = fcmp une float %val36, undef
+  br i1 %cmp36, label %37, label %36
+  %val37 = load volatile float* undef
+  %cmp37 = fcmp une float %val37, undef
+  br i1 %cmp37, label %38, label %37
+  %val38 = load volatile float* undef
+  %cmp38 = fcmp une float %val38, undef
+  br i1 %cmp38, label %39, label %38
+  %val39 = load volatile float* undef
+  %cmp39 = fcmp une float %val39, undef
+  br i1 %cmp39, label %40, label %39
+  %val40 = load volatile float* undef
+  %cmp40 = fcmp une float %val40, undef
+  br i1 %cmp40, label %41, label %40
+  %val41 = load volatile float* undef
+  %cmp41 = fcmp une float %val41, undef
+  br i1 %cmp41, label %42, label %41
+  %val42 = load volatile float* undef
+  %cmp42 = fcmp une float %val42, undef
+  br i1 %cmp42, label %43, label %42
+  %val43 = load volatile float* undef
+  %cmp43 = fcmp une float %val43, undef
+  br i1 %cmp43, label %44, label %43
+  %val44 = load volatile float* undef
+  %cmp44 = fcmp une float %val44, undef
+  br i1 %cmp44, label %45, label %44
+  %val45 = load volatile float* undef
+  %cmp45 = fcmp une float %val45, undef
+  br i1 %cmp45, label %46, label %45
+  %val46 = load volatile float* undef
+  %cmp46 = fcmp une float %val46, undef
+  br i1 %cmp46, label %47, label %46
+  %val47 = load volatile float* undef
+  %cmp47 = fcmp une float %val47, undef
+  br i1 %cmp47, label %48, label %47
+  %val48 = load volatile float* undef
+  %cmp48 = fcmp une float %val48, undef
+  br i1 %cmp48, label %49, label %48
+  %val49 = load volatile float* undef
+  %cmp49 = fcmp une float %val49, undef
+  br i1 %cmp49, label %50, label %49
+  %val50 = load volatile float* undef
+  %cmp50 = fcmp une float %val50, undef
+  br i1 %cmp50, label %51, label %50
+  %val51 = load volatile float* undef
+  %cmp51 = fcmp une float %val51, undef
+  br i1 %cmp51, label %52, label %51
+  %val52 = load volatile float* undef
+  %cmp52 = fcmp une float %val52, undef
+  br i1 %cmp52, label %53, label %52
+  %val53 = load volatile float* undef
+  %cmp53 = fcmp une float %val53, undef
+  br i1 %cmp53, label %54, label %53
+  %val54 = load volatile float* undef
+  %cmp54 = fcmp une float %val54, undef
+  br i1 %cmp54, label %55, label %54
+  %val55 = load volatile float* undef
+  %cmp55 = fcmp une float %val55, undef
+  br i1 %cmp55, label %56, label %55
+  %val56 = load volatile float* undef
+  %cmp56 = fcmp une float %val56, undef
+  br i1 %cmp56, label %57, label %56
+  %val57 = load volatile float* undef
+  %cmp57 = fcmp une float %val57, undef
+  br i1 %cmp57, label %58, label %57
+  %val58 = load volatile float* undef
+  %cmp58 = fcmp une float %val58, undef
+  br i1 %cmp58, label %59, label %58
+  %val59 = load volatile float* undef
+  %cmp59 = fcmp une float %val59, undef
+  br i1 %cmp59, label %60, label %59
+  %val60 = load volatile float* undef
+  %cmp60 = fcmp une float %val60, undef
+  br i1 %cmp60, label %61, label %60
+  %val61 = load volatile float* undef
+  %cmp61 = fcmp une float %val61, undef
+  br i1 %cmp61, label %62, label %61
+  %val62 = load volatile float* undef
+  %cmp62 = fcmp une float %val62, undef
+  br i1 %cmp62, label %63, label %62
+  %val63 = load volatile float* undef
+  %cmp63 = fcmp une float %val63, undef
+  br i1 %cmp63, label %64, label %63
+  %val64 = load volatile float* undef
+  %cmp64 = fcmp une float %val64, undef
+  br i1 %cmp64, label %65, label %64
+
+  br label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 88c09e3..43c47c0 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,65 @@
-; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-       ret i32 %tmp
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
+  ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
-       ret i16 %tmp
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
+  ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
-       ret i64 %tmp
+define i32 @t3(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
+  ret i32 %tmp
 ; CHECK: t3:
+; CHECK: tzcntl
+}
+
+define i64 @t4(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
+  ret i64 %tmp
+; CHECK: t4:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+define i8 @t5(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: t5:
+; CHECK: tzcntl
+}
 
-define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
-       ret i8 %tmp
-; CHECK: t4:
+define i16 @t6(i16 %x) nounwind  {
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: t6:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: t7:
+; CHECK: tzcntl
+}
+
+define i64 @t8(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: t8:
+; CHECK: tzcntq
+}
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
@@ -51,3 +76,124 @@ define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
 ; CHECK: andn64:
 ; CHECK: andnq
 }
+
+define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bextr32:
+; CHECK: bextrl
+}
+
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
+
+define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bextr64:
+; CHECK: bextrq
+}
+
+declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
+
+define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bzhi32:
+; CHECK: bzhil
+}
+
+declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
+
+define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bzhi64:
+; CHECK: bzhiq
+}
+
+declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+
+define i32 @blsi32(i32 %x) nounwind readnone {
+  %tmp = sub i32 0, %x
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsi32:
+; CHECK: blsil
+}
+
+define i64 @blsi64(i64 %x) nounwind readnone {
+  %tmp = sub i64 0, %x
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsi64:
+; CHECK: blsiq
+}
+
+define i32 @blsmsk32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = xor i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsmsk32:
+; CHECK: blsmskl
+}
+
+define i64 @blsmsk64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = xor i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsmsk64:
+; CHECK: blsmskq
+}
+
+define i32 @blsr32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsr32:
+; CHECK: blsrl
+}
+
+define i64 @blsr64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsr64:
+; CHECK: blsrq
+}
+
+define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pdep32:
+; CHECK: pdepl
+}
+
+declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
+
+define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pdep64:
+; CHECK: pdepq
+}
+
+declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
+
+define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pext32:
+; CHECK: pextl
+}
+
+declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
+
+define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pext64:
+; CHECK: pextq
+}
+
+declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
+
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 5cdc100..44670c8 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
+
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -106,3 +107,4 @@ bb2:                                              ; preds = %entry, %bb1
   %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
   ret float %.0
 }
+
diff --git a/test/CodeGen/X86/btq.ll b/test/CodeGen/X86/btq.ll
new file mode 100644
index 0000000..9c137a7
--- /dev/null
+++ b/test/CodeGen/X86/btq.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i64 %foo) nounwind {
+  %and = and i64 %foo, 4294967296
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test1:
+; CHECK: btq $32
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test2(i64 %foo) nounwind {
+  %and = and i64 %foo, 2147483648
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test2:
+; CHECK: testl $-2147483648
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index b060369..2d39901 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep add | not grep 16
+; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
new file mode 100644
index 0000000..7420ce7
--- /dev/null
+++ b/test/CodeGen/X86/cfstring.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; Make sure that the string ends up the the correct section.
+
+; CHECK:        .section __TEXT,__cstring
+; CHECK-NEXT: l_.str3:
+
+; CHECK:        .section  __DATA,__cfstring
+; CHECK-NEXT:   .align  4
+; CHECK-NEXT: L__unnamed_cfstring_4:
+; CHECK-NEXT:   .quad  ___CFConstantStringClassReference
+; CHECK-NEXT:   .long  1992
+; CHECK-NEXT:   .space  4
+; CHECK-NEXT:   .quad  l_.str3
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .space  4
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
deleted file mode 100644
index 439f7b0..0000000
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s
-;
-; Nested LSR is required to optimize this case.
-; We do not expect to see this form of IR without -enable-iv-rewrite.
-
-define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
-; CHECK: borf:
-; CHECK-NOT: inc
-; CHECK-NOT: leal 1(
-; CHECK-NOT: leal -1(
-; CHECK: decl
-; CHECK-NEXT: cmpl $-478
-; CHECK: ret
-
-bb4.thread:
-	br label %bb2.outer
-
-bb2.outer:		; preds = %bb4, %bb4.thread
-	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
-	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
-	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
-	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
-	%1 = mul i32 %0, 480		; <i32> [#uses=1]
-	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
-	br label %bb2
-
-bb2:		; preds = %bb2, %bb2.outer
-	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
-	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
-	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
-	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
-	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
-	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
-	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
-	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
-	%3 = add i32 %1, %2		; <i32> [#uses=9]
-	%4 = add i32 %3, -481		; <i32> [#uses=1]
-	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
-	%6 = load i8* %5, align 1		; <i8> [#uses=1]
-	%7 = add i32 %3, -480		; <i32> [#uses=1]
-	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
-	%9 = load i8* %8, align 1		; <i8> [#uses=1]
-	%10 = add i32 %3, -479		; <i32> [#uses=1]
-	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
-	%12 = load i8* %11, align 1		; <i8> [#uses=1]
-	%13 = add i32 %3, -1		; <i32> [#uses=1]
-	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
-	%15 = load i8* %14, align 1		; <i8> [#uses=1]
-	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
-	%17 = load i8* %16, align 1		; <i8> [#uses=1]
-	%18 = add i32 %3, 1		; <i32> [#uses=1]
-	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
-	%20 = load i8* %19, align 1		; <i8> [#uses=1]
-	%21 = add i32 %3, 481		; <i32> [#uses=1]
-	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
-	%23 = load i8* %22, align 1		; <i8> [#uses=1]
-	%24 = add i32 %3, 480		; <i32> [#uses=1]
-	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
-	%26 = load i8* %25, align 1		; <i8> [#uses=1]
-	%27 = add i32 %3, 479		; <i32> [#uses=1]
-	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
-	%29 = load i8* %28, align 1		; <i8> [#uses=1]
-	%30 = add i8 %9, %6		; <i8> [#uses=1]
-	%31 = add i8 %30, %12		; <i8> [#uses=1]
-	%32 = add i8 %31, %15		; <i8> [#uses=1]
-	%33 = add i8 %32, %17		; <i8> [#uses=1]
-	%34 = add i8 %33, %20		; <i8> [#uses=1]
-	%35 = add i8 %34, %23		; <i8> [#uses=1]
-	%36 = add i8 %35, %26		; <i8> [#uses=1]
-	%37 = add i8 %36, %29		; <i8> [#uses=1]
-	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb4, label %bb2
-
-bb4:		; preds = %bb2
-	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
-	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
-	br i1 %exitcond29, label %return, label %bb2.outer
-
-return:		; preds = %bb4
-	ret void
-}
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 8b53ae2..1c5c113 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -3,6 +3,10 @@
 ; Nested LSR is required to optimize this case.
 ; We do not expect to see this form of IR without -enable-iv-rewrite.
 
+; xfailed for now because the scheduler two-address hack has been disabled.
+; Now it's generating a leal -1 rather than a decq.
+; XFAIL: *
+
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
 ; CHECK: borf:
 ; CHECK-NOT: inc
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index d76fab4..763079f 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,141 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t1:
-; CHECK: bsrl
-; CHECK: cmov
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @cttz_i8(i8 %x)  {
+  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: cttz_i8:
+; CHECK: bsfl
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+define i16 @cttz_i16(i16 %x)  {
+  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: cttz_i16:
+; CHECK: bsfw
+; CHECK-NOT: cmov
+; CHECK: ret
+}
 
-define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t2:
+define i32 @cttz_i32(i32 %x)  {
+  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: cttz_i32:
 ; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @cttz_i64(i64 %x)  {
+  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: cttz_i64:
+; CHECK: bsfq
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+define i8 @ctlz_i8(i8 %x) {
+entry:
+  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
+  ret i8 %tmp2
+; CHECK: ctlz_i8:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $7,
+; CHECK: ret
+}
 
-define i16 @t3(i16 %x, i16 %y) nounwind  {
+define i16 @ctlz_i16(i16 %x) {
 entry:
-        %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
-	ret i16 %tmp2
-; CHECK: t3:
+  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+  ret i16 %tmp2
+; CHECK: ctlz_i16:
 ; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $15,
+; CHECK: ret
+}
+
+define i32 @ctlz_i32(i32 %x) {
+  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: ctlz_i32:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+}
+
+define i64 @ctlz_i64(i64 %x) {
+  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: ctlz_i64:
+; CHECK: bsrq
+; CHECK-NOT: cmov
+; CHECK: xorq $63,
+; CHECK: ret
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+define i32 @ctlz_i32_cmov(i32 %n) {
+entry:
+; Generate a cmov to handle zero inputs when necessary.
+; CHECK: ctlz_i32_cmov:
+; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  ret i32 %tmp1
+}
 
+define i32 @ctlz_i32_fold_cmov(i32 %n) {
+entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-
-define i32 @t4(i32 %n) nounwind {
-entry:
-; CHECK: t4:
+; CHECK: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
+; CHECK: xorl $31,
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @ctlz_bsr(i32 %n) {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: ctlz_bsr:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @ctlz_bsr_cmov(i32 %n) {
+entry:
+; Same as ctlz_bsr, but ensure this happens even when there is a potential
+; zero.
+; CHECK: ctlz_bsr_cmov:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 7a8d6e6..2e7ffbf 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -84,7 +84,7 @@ entry:
   br i1 %3, label %func_4.exit.i, label %bb.i.i.i
 
 bb.i.i.i:                                         ; preds = %entry
-  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
 ; CHECK: test4:
@@ -101,7 +101,7 @@ func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
 
 bb.i.i:                                           ; preds = %func_4.exit.i
-  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %5 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_1.exit
 
 func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index ba1c4ef..edbd0bc 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -3,7 +3,7 @@
 ; Basic 128-bit cmpxchg
 define void @t1(i128* nocapture %p) nounwind ssp {
 entry:
-; CHECK movl	$1, %ebx
+; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
   %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 8aa0bfd..d9e0778 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -21,6 +21,6 @@ bb:		; preds = %bb, %entry
 	br i1 %exitcond, label %bb13, label %bb
 
 bb13:		; preds = %bb
-	volatile store float %tmp6, float* @G, align 4
+	store volatile float %tmp6, float* @G, align 4
 	ret void
 }
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 1531457..cf6e27d 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -6,16 +6,16 @@
 ; Chain and flag folding issues.
 define i32 @test1() nounwind ssp {
 entry:
-  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %tmp5.i = load volatile i32* undef              ; <i32> [#uses=1]
   %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
-  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %tmp12.i = load volatile i32* undef             ; <i32> [#uses=1]
   %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
   %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
   %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
   %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
   %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
   %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
-  volatile store i32 %conv19.i, i32* undef
+  store volatile i32 %conv19.i, i32* undef
   ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index 3a849aa..adf9854 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+; RUN: llc -enable-dwarf-directory -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
 
 ; Radar 8884898
-; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
+; CHECK: file	1 "simple.c"
 
 declare i32 @printf(i8*, ...) nounwind
 
diff --git a/test/CodeGen/X86/dbg-inline.ll b/test/CodeGen/X86/dbg-inline.ll
deleted file mode 100644
index 523c62e..0000000
--- a/test/CodeGen/X86/dbg-inline.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Radar 7881628, 9747970
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-%class.APFloat = type { i32 }
-
-define i32 @_ZNK7APFloat9partCountEv(%class.APFloat* nocapture %this) nounwind uwtable readonly optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !28), !dbg !41
-  %prec = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !42
-  %tmp = load i32* %prec, align 4, !dbg !42, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !47), !dbg !48
-  %add.i = add i32 %tmp, 42, !dbg !49
-  ret i32 %add.i, !dbg !42
-}
-
-define zeroext i1 @_ZNK7APFloat14bitwiseIsEqualERKS_(%class.APFloat* %this, %class.APFloat* %rhs) uwtable optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !29), !dbg !51
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %rhs}, i64 0, metadata !30), !dbg !52
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !53), !dbg !55
-  %prec.i = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !56
-;CHECK: DW_TAG_inlined_subroutine
-;CHECK: DW_AT_abstract_origin
-;CHECK: DW_AT_ranges
-  %tmp.i = load i32* %prec.i, align 4, !dbg !56, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp.i}, i64 0, metadata !57), !dbg !58
-  %add.i.i = add i32 %tmp.i, 42, !dbg !59
-  tail call void @llvm.dbg.value(metadata !{i32 %add.i.i}, i64 0, metadata !31), !dbg !54
-  %call2 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %this) optsize, !dbg !60
-  tail call void @llvm.dbg.value(metadata !{i64* %call2}, i64 0, metadata !34), !dbg !60
-  %call3 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %rhs) optsize, !dbg !61
-  tail call void @llvm.dbg.value(metadata !{i64* %call3}, i64 0, metadata !37), !dbg !61
-  %tmp = zext i32 %add.i.i to i64
-  br label %for.cond, !dbg !62
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %tmp13 = sub i64 %tmp, %indvar, !dbg !62
-  %i.0 = trunc i64 %tmp13 to i32, !dbg !62
-  %cmp = icmp sgt i32 %i.0, 0, !dbg !62
-  br i1 %cmp, label %for.body, label %return, !dbg !62
-
-for.body:                                         ; preds = %for.cond
-  %p.0 = getelementptr i64* %call2, i64 %indvar, !dbg !63
-  %tmp6 = load i64* %p.0, align 8, !dbg !63, !tbaa !66
-  %tmp8 = load i64* %call3, align 8, !dbg !63, !tbaa !66
-  %cmp9 = icmp eq i64 %tmp6, %tmp8, !dbg !63
-  br i1 %cmp9, label %for.inc, label %return, !dbg !63
-
-for.inc:                                          ; preds = %for.body
-  %indvar.next = add i64 %indvar, 1, !dbg !67
-  br label %for.cond, !dbg !67
-
-return:                                           ; preds = %for.cond, %for.body
-  %retval.0 = phi i1 [ false, %for.body ], [ true, %for.cond ]
-  ret i1 %retval.0, !dbg !68
-}
-
-declare i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat*) optsize
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !7, !12, !23, !24, !25}
-!llvm.dbg.lv._ZNK7APFloat9partCountEv = !{!28}
-!llvm.dbg.lv._ZNK7APFloat14bitwiseIsEqualERKS_ = !{!29, !30, !31, !34, !37}
-!llvm.dbg.lv._ZL16partCountForBitsj = !{!38}
-!llvm.dbg.gv = !{!39}
-
-!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 136149)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 8, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 655362, metadata !0, metadata !"APFloat", metadata !3, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!3 = metadata !{i32 655401, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !1, metadata !7, metadata !12}
-!5 = metadata !{i32 655373, metadata !2, metadata !"prec", metadata !3, i32 13, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 655396, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 655406, i32 0, metadata !2, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 9, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{metadata !6, metadata !10}
-!10 = metadata !{i32 655375, metadata !0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!12 = metadata !{i32 655406, i32 0, metadata !2, metadata !"significandParts", metadata !"significandParts", metadata !"_ZNK7APFloat16significandPartsEv", metadata !3, i32 11, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!14 = metadata !{metadata !15, metadata !10}
-!15 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 655382, metadata !0, metadata !"integerPart", metadata !3, i32 2, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ]
-!17 = metadata !{i32 655382, metadata !0, metadata !"uint64_t", metadata !3, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ]
-!18 = metadata !{i32 655396, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!19 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !20, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!20 = metadata !{metadata !21, metadata !10, metadata !22}
-!21 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_reference_type ]
-!23 = metadata !{i32 655406, i32 0, metadata !0, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 23, metadata !8, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%class.APFloat*)* @_ZNK7APFloat9partCountEv, null, metadata !7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 655406, i32 0, metadata !0, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 28, metadata !19, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (%class.APFloat*, %class.APFloat*)* @_ZNK7APFloat14bitwiseIsEqualERKS_, null, metadata !1} ; [ DW_TAG_subprogram ]
-!25 = metadata !{i32 655406, i32 0, metadata !3, metadata !"partCountForBits", metadata !"partCountForBits", metadata !"", metadata !3, i32 17, metadata !26, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !27, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!27 = metadata !{metadata !6}
-!28 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!29 = metadata !{i32 655617, metadata !24, metadata !"this", metadata !3, i32 16777244, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!30 = metadata !{i32 655617, metadata !24, metadata !"rhs", metadata !3, i32 33554460, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 655616, metadata !32, metadata !"i", metadata !3, i32 29, metadata !33, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!32 = metadata !{i32 655371, metadata !24, i32 28, i32 56, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
-!33 = metadata !{i32 655396, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!34 = metadata !{i32 655616, metadata !32, metadata !"p", metadata !3, i32 30, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!35 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
-!36 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_const_type ]
-!37 = metadata !{i32 655616, metadata !32, metadata !"q", metadata !3, i32 31, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 655412, i32 0, metadata !3, metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !3, i32 3, metadata !40, i32 1, i32 1, i32 42} ; [ DW_TAG_variable ]
-!40 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_const_type ]
-!41 = metadata !{i32 22, i32 23, metadata !23, null}
-!42 = metadata !{i32 24, i32 10, metadata !43, null}
-!43 = metadata !{i32 655371, metadata !23, i32 23, i32 1, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
-!44 = metadata !{metadata !"int", metadata !45}
-!45 = metadata !{metadata !"omnipotent char", metadata !46}
-!46 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!47 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !42} ; [ DW_TAG_arg_variable ]
-!48 = metadata !{i32 16, i32 58, metadata !25, metadata !42}
-!49 = metadata !{i32 18, i32 3, metadata !50, metadata !42}
-!50 = metadata !{i32 655371, metadata !25, i32 17, i32 1, metadata !3, i32 4} ; [ DW_TAG_lexical_block ]
-!51 = metadata !{i32 28, i32 15, metadata !24, null}
-!52 = metadata !{i32 28, i32 45, metadata !24, null}
-!53 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, metadata !54} ; [ DW_TAG_arg_variable ]
-!54 = metadata !{i32 29, i32 10, metadata !32, null}
-!55 = metadata !{i32 22, i32 23, metadata !23, metadata !54}
-!56 = metadata !{i32 24, i32 10, metadata !43, metadata !54}
-!57 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !56} ; [ DW_TAG_arg_variable ]
-!58 = metadata !{i32 16, i32 58, metadata !25, metadata !56}
-!59 = metadata !{i32 18, i32 3, metadata !50, metadata !56}
-!60 = metadata !{i32 30, i32 24, metadata !32, null}
-!61 = metadata !{i32 31, i32 24, metadata !32, null}
-!62 = metadata !{i32 32, i32 3, metadata !32, null}
-!63 = metadata !{i32 33, i32 5, metadata !64, null}
-!64 = metadata !{i32 655371, metadata !65, i32 32, i32 25, metadata !3, i32 3} ; [ DW_TAG_lexical_block ]
-!65 = metadata !{i32 655371, metadata !32, i32 32, i32 3, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
-!66 = metadata !{metadata !"long long", metadata !45}
-!67 = metadata !{i32 32, i32 15, metadata !65, null}
-!68 = metadata !{i32 37, i32 1, metadata !32, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index afe1729..c35935f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp7
+;CHECK-NEXT: Ltmp5
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
new file mode 100644
index 0000000..788910c
--- /dev/null
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Radar 10464995
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@s = common global [4294967296 x i8] zeroinitializer, align 16
+;CHECK: .long	4294967295
+
+define void @bar() nounwind uwtable ssp {
+entry:
+  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
+  ret void, !dbg !20
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 5, i32 3, metadata !19, null}
+!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
index 481c4ba..d248a41 100644
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -8,7 +8,7 @@
 ;CHECK-NEXT: DW_AT_call_file
 ;CHECK-NEXT: DW_AT_call_line
 ;CHECK-NEXT: DW_TAG_formal_parameter
-;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+;CHECK-NEXT: Lstring11-Lsection_str ## DW_AT_name
 
 %struct.S1 = type { float*, i32 }
 
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index a0e4d16..05e29ec 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 ;Radar 8950491
 
-;CHECK:        .ascii   "var"                  ## DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset5
 ;CHECK-NEXT:        ## DW_AT_decl_file
 ;CHECK-NEXT:        ## DW_AT_decl_line
 ;CHECK-NEXT:        ## DW_AT_type
diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 87c1be5..e577ecb 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
new file mode 100644
index 0000000..c64752c
--- /dev/null
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+
+; The important part of the following check is that dir = #0.
+;                        Dir  Mod Time   File Len   File Name
+;                        ---- ---------- ---------- ---------------------------
+; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 874c53a..ac5174d 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,14 +6,11 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_startproc
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: pushq %rbp
@@ -25,5 +22,4 @@ entry:
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
 ; CHECK-FP-NEXT: nop
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 52dcb61..0f16a64 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
+; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
new file mode 100644
index 0000000..2135f94
--- /dev/null
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s
+
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
+
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e151821..e4982f0 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 4abc3b5..8ac15cd 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 91d1f5d..f0375f8 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -82,9 +82,8 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   ret i64 %v11
 ; X64: test5:
 ; X64: movslq	%e[[A1]], %rax
-; X64-NEXT: movq	(%r[[A0]],%rax), %rax
-; X64-NEXT: addq	%{{rdx|r8}}, %rax
-; X64-NEXT: ret
+; X64-NEXT: (%r[[A0]],%rax),
+; X64: ret
 }
 
 ; PR9500, rdar://9156159 - Don't do non-local address mode folding,
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 6a5a102..d8f4663 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -82,7 +82,7 @@ entry:
   ret i64 %mul
 
 ; CHECK: test6:
-; CHECK: leaq	(,%rdi,8), %rax
+; CHECK: shlq	$3, %rdi
 }
 
 define i32 @test7(i32 %x) nounwind ssp {
@@ -90,7 +90,7 @@ entry:
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
 ; CHECK: test7:
-; CHECK: leal	(,%rdi,8), %eax
+; CHECK: shll	$3, %edi
 }
 
 
@@ -225,18 +225,20 @@ if.else:                                          ; preds = %entry
 ; CHECK-NEXT: je 
 }
 
-; Check that 0.0 is materialized using pxor
+; Check that 0.0 is materialized using xorps
 define void @test18(float* %p1) {
   store float 0.0, float* %p1
   ret void
 ; CHECK: test18:
-; CHECK: pxor
+; CHECK: xorps
 }
+
+; Without any type hints, doubles use the smaller xorps instead of xorpd.
 define void @test19(double* %p1) {
   store double 0.0, double* %p1
   ret void
 ; CHECK: test19:
-; CHECK: pxor
+; CHECK: xorps
 }
 
 ; Check that we fast-isel sret
@@ -252,12 +254,12 @@ entry:
 }
 declare void @test20sret(%struct.a* sret)
 
-; Check that -0.0 is not materialized using pxor
+; Check that -0.0 is not materialized using xor
 define void @test21(double* %p1) {
   store double -0.0, double* %p1
   ret void
 ; CHECK: test21:
-; CHECK-NOT: pxor
+; CHECK-NOT: xor
 ; CHECK: movsd	LCPI
 }
 
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19972f7..b9598bb 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 8391860..c88d529 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -99,7 +99,6 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   ret void
 }
 
-
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
@@ -108,3 +107,13 @@ define void @crash_test1() nounwind ssp {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+define i64* @life() nounwind {
+  %a1 = alloca i64*, align 8
+  %a2 = bitcast i64** %a1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
+  %a3 = load i64** %a1, align 8
+  ret i64* %a3
+}
+
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
new file mode 100644
index 0000000..0749682
--- /dev/null
+++ b/test/CodeGen/X86/fdiv.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+
+define double @exact(double %x) {
+; Exact division by a constant converted to multiplication.
+; CHECK: @exact
+; CHECK: mulsd
+  %div = fdiv double %x, 2.0
+  ret double %div
+}
+
+define double @inexact(double %x) {
+; Inexact division by a constant converted to multiplication.
+; CHECK: @inexact
+; CHECK: mulsd
+  %div = fdiv double %x, 0x41DFFFFFFFC00000 
+  ret double %div
+}
+
+define double @funky(double %x) {
+; No conversion to multiplication if too funky.
+; CHECK: @funky
+; CHECK: divsd
+  %div = fdiv double %x, 0.0
+  ret double %div
+}
+
+define double @denormal1(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal1
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FD0000000000001
+  ret double %div
+}
+
+define double @denormal2(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
+  ret double %div
+}
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 2ffcb96..81511a3 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -4,6 +4,8 @@
 
 ; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
 ; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
 
 @.str = private constant [4 x i8] c"%f\0A\00"
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
new file mode 100644
index 0000000..cfe484a
--- /dev/null
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @foo(i32 (i8*, ...)* %f) nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
new file mode 100644
index 0000000..5ed03ef
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,295 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+
+; VFMADD
+define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUB
+define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMADD
+define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMSUB
+define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMADDSUB
+define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUBADD
+define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 9f79f77..93baa0e 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,21 +1,77 @@
-; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
+; CHECK: t1:
+; CHECK-NOT: and
+; CHECK: movzbl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
-	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
-	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %tmp2 = shl i32 %i, 2
+  %tmp4 = and i32 %tmp2, 1020
+  %tmp7 = getelementptr i8* %X, i32 %tmp4
+  %tmp78 = bitcast i8* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
 }
 
 define i32 @t2(i16* %X, i32 %i) {
+; CHECK: t2:
+; CHECK-NOT: and
+; CHECK: movzwl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %tmp2 = shl i32 %i, 1
+  %tmp4 = and i32 %tmp2, 131070
+  %tmp7 = getelementptr i16* %X, i32 %tmp4
+  %tmp78 = bitcast i16* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
+}
+
+define i32 @t3(i16* %i.ptr, i32* %arr) {
+; This case is tricky. The lshr followed by a gep will produce a lshr followed
+; by an and to remove the low bits. This can be simplified by doing the lshr by
+; a greater constant and using the addressing mode to scale the result back up.
+; To make matters worse, because of the two-phase zext of %i and their reuse in
+; the function, the DAG can get confusing trying to re-use both of them and
+; prevent easy analysis of the mask in order to match this.
+; CHECK: t3:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
+  %val = load i32* %val.ptr
+  %sum = add i32 %val, %i.zext
+  ret i32 %sum
+}
+
+define i32 @t4(i16* %i.ptr, i32* %arr) {
+; A version of @t3 that has more zero extends and more re-use of intermediate
+; values. This exercise slightly different bits of canonicalization.
+; CHECK: t4:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
-	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
-	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %sum.1 = add i32 %val, %i.zext
+  %sum.2 = add i32 %sum.1, %index
+  ret i32 %sum.2
 }
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 5525af2..e03cb7e 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 647bbdb..1d315ff 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+
+; i386 test has been disabled when scheduler 2-addr hack is disabled.
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9f8d990..9cf4607 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,15 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s
 
 ; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
+;
+; RAGreedy defeats the test by splitting live ranges.
 
 ; Constant pool all-ones vector:
-; CHECK: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
+; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
 ; CHECK: program_1:
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index b9cb5d7..df90254 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -10,7 +10,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
 ; Pass arguments on the stack.
 ; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
 ; Copy constant-pool value.
-; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fldl LCPI
 ; CHECK-NEXT: fstpt 16([[RCX]])
 ; Copy x1 return value.
 ; CHECK-NEXT: fstpt ([[RCX]])
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index f220b24..3e26141 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp13 = tail call double @foo()
 	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
-	volatile store double %tmp3940, double* %b
+	store volatile double %tmp3940, double* %b
 	ret void
 }
 
diff --git a/test/CodeGen/X86/fsgsbase.ll b/test/CodeGen/X86/fsgsbase.ll
new file mode 100644
index 0000000..0c22e3c
--- /dev/null
+++ b/test/CodeGen/X86/fsgsbase.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=core-avx-i -mattr=fsgsbase | FileCheck %s
+
+define i32 @test_x86_rdfsbase_32() {
+  ; CHECK: rdfsbasel
+  %res = call i32 @llvm.x86.rdfsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdfsbase.32() nounwind readnone
+
+define i32 @test_x86_rdgsbase_32() {
+  ; CHECK: rdgsbasel
+  %res = call i32 @llvm.x86.rdgsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdgsbase.32() nounwind readnone
+
+define i64 @test_x86_rdfsbase_64() {
+  ; CHECK: rdfsbaseq
+  %res = call i64 @llvm.x86.rdfsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdfsbase.64() nounwind readnone
+
+define i64 @test_x86_rdgsbase_64() {
+  ; CHECK: rdgsbaseq
+  %res = call i64 @llvm.x86.rdgsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdgsbase.64() nounwind readnone
+
+define void @test_x86_wrfsbase_32(i32 %x) {
+  ; CHECK: wrfsbasel
+  call void @llvm.x86.wrfsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrgsbase_32(i32 %x) {
+  ; CHECK: wrgsbasel
+  call void @llvm.x86.wrgsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrfsbase_64(i64 %x) {
+  ; CHECK: wrfsbaseq
+  call void @llvm.x86.wrfsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.64(i64) nounwind readnone
+
+define void @test_x86_wrgsbase_64(i64 %x) {
+  ; CHECK: wrgsbaseq
+  call void @llvm.x86.wrgsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
new file mode 100644
index 0000000..d89e9dc
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-apple-darwin %s -o - | FileCheck %s
+@_ZTIi = external constant i8*
+
+define i32 @main() uwtable optsize ssp {
+entry:
+  invoke void @_Z1fv() optsize
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %eh.resume
+
+try.cont:
+  ret i32 0
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z1fv() optsize
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: Leh_func_end0:
+; CHECK: GCC_except_table0
+; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 91758ea..5f1f4fd 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -192,3 +192,94 @@ define <4 x float> @hsubps4(<4 x float> %x) {
   %r = fsub <4 x float> %a, %b
   ret <4 x float> %r
 }
+
+; SSE3: vhaddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps1:
+; AVX: vhaddps
+define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps2:
+; AVX: vhaddps
+define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps3:
+; AVX: vhaddps
+define <8 x float> @vhaddps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps1:
+; AVX: vhsubps
+define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps3:
+; AVX: vhsubps
+define <8 x float> @vhsubps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; SSE3: haddpd
+; AVX: vhaddpd1:
+; AVX: vhaddpd
+define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fadd <4 x double> %a, %b
+  ret <4 x double> %r
+}
+
+; SSE3: vhsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; SSE3: hsubpd
+; AVX: vhsubpd1:
+; AVX: vhsubpd
+define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fsub <4 x double> %a, %b
+  ret <4 x double> %r
+}
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
new file mode 100644
index 0000000..4289fa7
--- /dev/null
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -stats -O2 |& grep "1 machine-licm"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+  %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
+  %inc = add i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/i128-sdiv.ll b/test/CodeGen/X86/i128-sdiv.ll
new file mode 100644
index 0000000..ab5cdda
--- /dev/null
+++ b/test/CodeGen/X86/i128-sdiv.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Make sure none of these crash, and that the power-of-two transformations
+; trigger correctly.
+
+define i128 @test1(i128 %x) {
+  ; CHECK: test1:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, 73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test2(i128 %x) {
+  ; CHECK: test2:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, -73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test3(i128 %x) {
+  ; CHECK: test3:
+  ; CHECK: call
+  %tmp = sdiv i128 %x, -73786976294838206467
+  ret i128 %tmp
+}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index c9a1c1c..2249618 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s
 
 ; There should be no stack manipulations between the inline asm and ret.
 ; CHECK: test1
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 1c8e2f9..fca68ba 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+avx
 ; rdar://7066579
 
 	%0 = type { i64, i64, i64, i64, i64 }		; type %0
@@ -20,3 +20,18 @@ define void @test3(double %tmp) nounwind {
   call void asm sideeffect "$0", "q"(double %tmp) nounwind
   ret void
 }
+
+; rdar://10392864
+define void @test4(i8 signext %val, i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d) nounwind {
+entry:
+  %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
+  ret void
+}
+
+; rdar://10614894
+define <8 x float> @test5(<8 x float> %a, <8 x float> %b) nounwind {
+entry:
+  %0 = tail call <8 x float> asm "vperm2f128 $3, $2, $1, $0", "=x,x,x,i,~{dirflag},~{fpsr},~{flags}"(<8 x float> %a, <8 x float> %b, i32 16) nounwind
+  ret <8 x float> %0
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 79b6885..91576fb 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic | FileCheck %s
 ; rdar://6992609
 
+; CHECK: movl [[EDX:%e..]], 4(%esp)
+; CHECK: movl [[EDX]], 4(%esp)
 target triple = "i386-apple-darwin9.0"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
deleted file mode 100644
index 8f79fb8..0000000
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ /dev/null
@@ -1,300 +0,0 @@
-; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
-; RUN: not grep inc %t
-; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
-; RUN: not grep addb %t
-; RUN: not grep leaq %t
-; RUN: not grep leal %t
-; RUN: not grep movq %t
-
-; IV users in each of the loops from other loops shouldn't cause LSR
-; to insert new induction variables. Previously it would create a
-; flood of new induction variables.
-; Also, the loop reversal should kick in once.
-;
-; In this example, performing LSR on the entire loop nest,
-; as opposed to only the inner loop can further reduce induction variables,
-; and their related instructions and registers.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
-entry:
-      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
-      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
-      %2 = or i32 %1, %0		; <i32> [#uses=1]
-      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-      br i1 %3, label %bb2, label %bb13
-
-bb:		; preds = %bb3
-      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-      %6 = fmul float %4, %5		; <float> [#uses=1]
-      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
-      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-      br label %bb2
-
-bb2:		; preds = %entry, %bb
-      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-      br i1 %8, label %bb3, label %bb4
-
-bb3:		; preds = %bb2
-      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-      %10 = and i64 %9, 15		; <i64> [#uses=1]
-      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-      br i1 %11, label %bb4, label %bb
-
-bb4:		; preds = %bb3, %bb2
-      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-      %13 = and i64 %12, 15		; <i64> [#uses=1]
-      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-      br i1 %14, label %bb6.preheader, label %bb10.preheader
-
-bb10.preheader:		; preds = %bb4
-      br i1 %15, label %bb9, label %bb12.loopexit
-
-bb6.preheader:		; preds = %bb4
-      br i1 %15, label %bb5, label %bb8.loopexit
-
-bb5:		; preds = %bb5, %bb6.preheader
-      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
-	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
-	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
-	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
-	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
-	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
-	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
-	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
-	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
-	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
-	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
-	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
-	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
-	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
-	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
-	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
-	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
-	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
-	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
-	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
-	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
-	br i1 %49, label %bb5, label %bb8.loopexit
-
-bb7:		; preds = %bb7, %bb8.loopexit
-	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
-	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
-	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
-	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
-	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
-	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
-	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
-	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
-	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
-	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
-	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
-	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
-	br i1 %59, label %bb7, label %bb13
-
-bb8.loopexit:		; preds = %bb5, %bb6.preheader
-	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
-	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
-	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
-	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
-	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
-	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
-	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
-	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
-	br i1 %60, label %bb7, label %bb13
-
-bb9:		; preds = %bb9, %bb10.preheader
-	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
-	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
-	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
-	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
-	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
-	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
-	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
-	%62 = load <4 x float>* %61, align 1
-	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
-	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
-	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
-	%65 = load <4 x float>* %64, align 1
-	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
-	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
-	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
-	%68 = load <4 x float>* %67, align 1
-	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
-	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
-	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
-	%71 = load <4 x float>* %70, align 1
-	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
-	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
-	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
-	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
-	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
-	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
-	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
-	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
-	br i1 %94, label %bb9, label %bb12.loopexit
-
-bb11:		; preds = %bb11, %bb12.loopexit
-	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
-	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
-	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
-	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
-	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
-	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
-	%96 = load <4 x float>* %95, align 1
-	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
-	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
-	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
-	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
-	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
-	br i1 %104, label %bb11, label %bb13
-
-bb12.loopexit:		; preds = %bb9, %bb10.preheader
-	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
-	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
-	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
-	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
-	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
-	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
-	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
-	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
-	br i1 %105, label %bb11, label %bb13
-
-bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
-	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
-	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
-	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
-	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
-	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
-	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
-	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
-	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = fadd float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
-	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
-	br i1 %112, label %bb.nph56, label %bb16
-
-bb.nph56:		; preds = %bb13
-	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb14, %bb.nph56
-	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
-	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
-	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
-	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
-	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
-	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
-	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
-	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
-	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = fmul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
-	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
-	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb16, label %bb14
-
-bb16:		; preds = %bb14, %bb13
-	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	store float %Sum0.2.lcssa, float* %C, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5e8e162..dbd133c 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 | grep jns
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
 
 define i32 @f(i32 %X) {
 entry:
+; CHECK: f:
+; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_next
@@ -18,3 +20,15 @@ cond_next:		; preds = %cond_true, %entry
 declare i32 @bar(...)
 
 declare i32 @baz(...)
+
+; rdar://10633221
+define i32 @g(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: g:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/X86/legalize-libcalls.ll b/test/CodeGen/X86/legalize-libcalls.ll
new file mode 100644
index 0000000..879dc98
--- /dev/null
+++ b/test/CodeGen/X86/legalize-libcalls.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86-64 < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+define float @MakeSphere(float %theta.079) nounwind {
+entry:
+  %add36 = fadd float %theta.079, undef
+  %call = call float @cosf(float %theta.079) nounwind readnone
+  %call45 = call float @sinf(float %theta.079) nounwind readnone
+  %call37 = call float @sinf(float %add36) nounwind readnone
+  store float %call, float* undef, align 8
+  store float %call37, float* undef, align 8
+  store float %call45, float* undef, align 8
+  ret float %add36
+}
+
+define hidden fastcc void @unroll_loop(i64 %storemerge32129) nounwind {
+entry:
+  call fastcc void @copy_rtx() nounwind
+  call fastcc void @copy_rtx() nounwind
+  %tmp225 = alloca i8, i64 %storemerge32129, align 8 ; [#uses=0 type=i8*]
+  %cmp651201 = icmp slt i64 %storemerge32129, 0   ; [#uses=1 type=i1]
+  br i1 %cmp651201, label %for.body653.lr.ph, label %if.end638.for.end659_crit_edge
+
+for.body653.lr.ph:                                ; preds = %entry
+  unreachable
+
+if.end638.for.end659_crit_edge:                   ; preds = %entry
+  unreachable
+}
+
+declare float @cosf(float) nounwind readnone
+declare float @sinf(float) nounwind readnone
+declare hidden fastcc void @copy_rtx() nounwind
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
new file mode 100644
index 0000000..c9f2fc2
--- /dev/null
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
+
+define i64 @test1(i32 %xx, i32 %test) nounwind {
+  %conv = zext i32 %xx to i64
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %conv, %sh_prom
+  ret i64 %shl
+; CHECK: test1:
+; CHECK: shll	%cl, %eax
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test2(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %xx, %sh_prom
+  ret i64 %shl
+; CHECK: test2:
+; CHECK: shll	%cl, %esi
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+; CHECK: orl	%esi, %edx
+; CHECK: shll	%cl, %eax
+}
+
+define i64 @test3(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = lshr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test3:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test4(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = ashr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test4:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: sarl	%cl, %edx
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
new file mode 100644
index 0000000..a8ad0f1
--- /dev/null
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/log2_not_readnone.ll b/test/CodeGen/X86/log2_not_readnone.ll
new file mode 100644
index 0000000..5620835
--- /dev/null
+++ b/test/CodeGen/X86/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i386-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: calll log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: calll exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
deleted file mode 100644
index d6c265f..0000000
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1
-
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
-entry:
-	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
-	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
-
-bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
-	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
-	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
-	br label %bb1.us
-
-bb1.us:		; preds = %bb1.us, %bb26.outer.us
-	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
-	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
-	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
-	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
-	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
-	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
-	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
-	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
-	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
-	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
-	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
-	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
-
-bb26.bb32_crit_edge.us:		; preds = %bb1.us
-	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
-
-bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
-	ret i32 %k.1.lcssa.lcssa.us-lcssa
-}
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index b07eeb6..d50a668 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -11,9 +11,9 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
-	volatile store i16 %tmp1, i16* @X, align 2
+	store volatile i16 %tmp1, i16* @X, align 2
 	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
-	volatile store i16 %tmp34, i16* @Y, align 2
+	store volatile i16 %tmp34, i16* @Y, align 2
 	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 938023f..ebda9f2 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,6 +1,8 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
 
+; CHECK: t:
 ; CHECK: decq
+; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
@@ -135,3 +137,44 @@ bb2:		; preds = %bb
 	store i8 %92, i8* %93, align 1
 	ret void
 }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll
index d0d2bbd..d825b5a 100644
--- a/test/CodeGen/X86/lsr-nonaffine.ll
+++ b/test/CodeGen/X86/lsr-nonaffine.ll
@@ -19,7 +19,7 @@ entry:
 
 loop:
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  volatile store i64 %i, i64* %p
+  store volatile i64 %i, i64* %p
   %i.next = add i64 %i, %s
   %c = icmp slt i64 %i.next, %n
   br i1 %c, label %loop, label %exit
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 527a5a6..1311a73 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
 target datalayout = "e-p:64:64:64"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 1f3b59a..b85ddeb 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -12,7 +12,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
-	volatile store i16 %1, i16* @X, align 2
+	store volatile i16 %1, i16* @X, align 2
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index e5a55ab..2faa24a 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,38 +1,62 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+	ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x )
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone
+define i32 @t3(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+	ret i32 %tmp
+; CHECK: t3:
+; CHECK: lzcntl
+}
 
-define i64 @t3(i64 %x) nounwind  {
-	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x )
+define i64 @t4(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t3:
+; CHECK: t4:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
-
-define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x )
+define i8 @t5(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t4:
+; CHECK: t5:
+; CHECK: lzcntl
+}
+
+define i16 @t6(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+	ret i16 %tmp
+; CHECK: t6:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+	ret i32 %tmp
+; CHECK: t7:
+; CHECK: lzcntl
+}
 
+define i64 @t8(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+	ret i64 %tmp
+; CHECK: t8:
+; CHECK: lzcntq
+}
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 0000000..54fa01c
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8..a757cde 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us:                                     ; preds = %for.body53.us, %if.
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0b4d73a..a7b036e 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
-; RUN: grep add %t | count 2
+; RUN: grep add %t | count 1
 ; RUN: grep inc %t | count 4
 ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
new file mode 100644
index 0000000..41f96e8
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10 -mattr=avx -show-mc-encoding < %s | FileCheck %s
+
+define i64 @t1(double %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t1
+  %0 = bitcast double %d_ivar to i64
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+  ret i64 %0
+}
+
+define double @t2(i64 %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t2
+  %0 = bitcast i64 %d_ivar to double
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+  ret double %0
+}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f43b0bf..86c6862 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -79,3 +79,16 @@ entry:
 ; LINUX movq
 }
 
+
+@.str = private unnamed_addr constant [30 x i8] c"\00aaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+define void @test5(i8* nocapture %C) nounwind uwtable ssp {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+  ret void
+
+; DARWIN: movabsq	$7016996765293437281
+; DARWIN: movabsq	$7016996765293437184
+}
+
+
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
new file mode 100644
index 0000000..8f2f6f7
--- /dev/null
+++ b/test/CodeGen/X86/misched-new.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
+; REQUIRES: asserts
+;
+; Interesting MachineScheduler cases.
+;
+; FIXME: There should be an assert in the coalescer that we're not rematting
+; "not-quite-dead" copies, but that breaks a lot of tests <rdar://problem/11148682>.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; From oggenc.
+; After coalescing, we have a dead superreg (RAX) definition.
+;
+; CHECK: xorl %esi, %esi
+; CHECK: movl $32, %ecx
+; CHECK: rep;movsl
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.cond.preheader, label %if.end
+
+for.cond.preheader:                               ; preds = %entry
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index 3ac0e4e..8b7200d 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 6062b50..d9c7c67 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | grep pinsr
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
deleted file mode 100644
index a7ce7d9..0000000
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
-
-	%struct.vS1024 = type { [8 x <4 x i32>] }
-	%struct.vS512 = type { [4 x <4 x i32>] }
-
-declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
-
-define void @t() nounwind {
-entry:
-	br label %bb554
-
-bb554:		; preds = %bb554, %entry
-	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
-	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
-	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
-	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
-        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
-	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
-        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
-	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
-        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
-	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
-	br label %bb554
-}
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
deleted file mode 100644
index 191e261..0000000
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
-; There are no MMX operations here; this is promoted to XMM.
-
-define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
-entry:
-	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
-	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	store <1 x i64> %3, <1 x i64>* %b, align 8
-	br label %bb2
-
-bb2:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 2368548..928ad03 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -78,6 +78,22 @@ entry:
   ret i32 %shr.i
 }
 
+; PR11570
+define void @float_call_signbit(double %n) {
+entry:
+; FIXME: This should also use movmskps; we don't form the FGETSIGN node
+; in this case, though.
+; CHECK: float_call_signbit:
+; CHECK: movd %xmm0, %rdi
+; FIXME
+  %t0 = bitcast double %n to i64
+  %tobool.i.i.i.i = icmp slt i64 %t0, 0
+  tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
+  ret void
+}
+declare void @float_call_signbit_callee(i1 zeroext)
+
+
 ; rdar://10247336
 ; movmskp{s|d} only set low 4/2 bits, high bits are known zero
 
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 51a0611..4f7e28a 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,6 +1,10 @@
 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
 ; rdar://7236213
 
+; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
+; The code isn't any worse though.
+; XFAIL: *
+
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
 ; the induction variables.
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 82b7331..8036710 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -52,8 +52,8 @@ bb:		; preds = %bb23
 	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
 	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
 	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp1718, i32* @var
-	volatile store i32 %tmp13, i32* @var
+	store volatile i32 %tmp1718, i32* @var
+	store volatile i32 %tmp13, i32* @var
 	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
 	store i32 %tmp22, i32* %i, align 4
@@ -86,7 +86,7 @@ bb28:		; preds = %bb46
 	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
 	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
 	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp41, i32* @var
+	store volatile i32 %tmp41, i32* @var
 	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
 	store i32 %tmp45, i32* %i, align 4
@@ -127,8 +127,8 @@ bb52:		; preds = %bb78
 	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
 	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
 	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp7273, i32* @var
-	volatile store i32 %tmp66, i32* @var
+	store volatile i32 %tmp7273, i32* @var
+	store volatile i32 %tmp66, i32* @var
 	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
 	store i32 %tmp77, i32* %i, align 4
@@ -161,7 +161,7 @@ bb84:		; preds = %bb101
 	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
 	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
 	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp96, i32* @var
+	store volatile i32 %tmp96, i32* @var
 	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
 	store i32 %tmp100, i32* %i, align 4
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index ef27cbc..7822453 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -67,7 +67,7 @@ declare void @exit(i32) noreturn
 ; DAG Combiner can't fold this into a load of the 1'th byte.
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
-  volatile store i32 128, i32* %P
+  store volatile i32 128, i32* %P
   %tmp4.pre = load i32* %P
   %phitmp = trunc i32 %tmp4.pre to i16
   %phitmp13 = shl i16 %phitmp, 8
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c3f412e..92850f2 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,10 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
 
 declare i8* @_Znwm(i32)
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
 
 declare void @_ZdlPv(i8*) nounwind
diff --git a/test/CodeGen/X86/no-cfi.ll b/test/CodeGen/X86/no-cfi.ll
index f9985d4..5bb9bb2 100644
--- a/test/CodeGen/X86/no-cfi.ll
+++ b/test/CodeGen/X86/no-cfi.ll
@@ -24,15 +24,11 @@ invoke.cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception() nounwind
-  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
   ret void
 }
 
 declare i32 @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 1d09535..ae04435 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
 ; CHECK: movntdq
   %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
 ; CHECK: movntpd
   %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
 ; CHECK: movnti
   %cast3 = bitcast i8* %B to i32*
   store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
new file mode 100644
index 0000000..7c0e82f
--- /dev/null
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -0,0 +1,11 @@
+; Check the MCNullStreamer operates correctly, at least on a minimal test case.
+;
+; RUN: llc -filetype=null -o %t -march=x86 %s
+
+define void @f0()  {
+  ret void
+}
+
+define void @f1() {
+  ret void
+}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
new file mode 100644
index 0000000..8cb2c03
--- /dev/null
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK:        .section  __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK-NEXT: L_OBJC_IMAGE_INFO:
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .long  2
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 0493edc..8f1eabd 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -12,8 +12,8 @@ entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
-; X64: movabsq $-1, %rax
-; X64: cmpq    $-1, %rax
+; X64: movabsq $-1, [[RAX:%r..]]
+; X64: cmpq    $-1, [[RAX]]
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
new file mode 100644
index 0000000..547334c
--- /dev/null
+++ b/test/CodeGen/X86/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; X86LINUX:   .section        .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; X86LINUX:   .section        .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e42aa9d..d092916 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index c1fc041..d185af1 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -13,7 +13,7 @@
 
 define i32 @test1(i32 %X) {
         %Z = shl i32 %X, 2              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index 528c4bc..a379980 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s
 ; rdar://7226797
 
 ; LLVM should omit the testl and use the flags result from the orl.
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 5e18044..d48a331 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {xorps	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll
new file mode 100644
index 0000000..30a5d39
--- /dev/null
+++ b/test/CodeGen/X86/personality_size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=x86_64-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=i386-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      .size	DW.ref.__gxx_personality_v0, 8
+; X64:      .quad	__gxx_personality_v0
+
+; X32:      .size	DW.ref.__gxx_personality_v0, 4
+; X32:      .long	__gxx_personality_v0
+
diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll
new file mode 100644
index 0000000..62d85f7
--- /dev/null
+++ b/test/CodeGen/X86/phaddsub.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSSE3: phaddw1:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw1:
+; AVX: vphaddw
+define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddw2:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw2:
+; AVX: vphaddw
+define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14>
+  %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddd1:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd1:
+; AVX: vphaddd
+define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd2:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd2:
+; AVX: vphaddd
+define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd3:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd3:
+; AVX: vphaddd
+define <4 x i32> @phaddd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd4:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd4:
+; AVX: vphaddd
+define <4 x i32> @phaddd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd5:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd5:
+; AVX: vphaddd
+define <4 x i32> @phaddd5(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd6:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd6:
+; AVX: vphaddd
+define <4 x i32> @phaddd6(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd7:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd7:
+; AVX: vphaddd
+define <4 x i32> @phaddd7(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubw1:
+; SSSE3-NOT: vphsubw
+; SSSE3: phsubw
+; AVX: phsubw1:
+; AVX: vphsubw
+define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = sub <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phsubd1:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd1:
+; AVX: vphsubd
+define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd2:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd2:
+; AVX: vphsubd
+define <4 x i32> @phsubd2(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd3:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd3:
+; AVX: vphsubd
+define <4 x i32> @phsubd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd4:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd4:
+; AVX: vphsubd
+define <4 x i32> @phsubd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fb60ac2..fc06309 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
 
 @ptr = external global i32* 
 @dst = external global i32 
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
new file mode 100644
index 0000000..cc1df2f
--- /dev/null
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: SHUFF0
+define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <8 x i32> <i32 2, i32 7, i32 1, i32 2, i32 4, i32 5, i32 1, i32 1>
+;CHECK: pshufd
+  ret <8 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF1
+define <4 x i32*> @SHUFF1(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <4 x i32> <i32 2, i32 7, i32 7, i32 2>
+;CHECK: pshufd
+  ret <4 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF3
+define <4 x i8*> @SHUFF3(<4 x i8*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i8*> %ptrv, <4 x i8*> undef, <4 x i32> <i32 2, i32 7, i32 1, i32 2>
+;CHECK: pshufd
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD0
+define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD1
+define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movdqa
+;CHECK: pshufd
+;CHECK: movdqa
+  %T = shufflevector <4 x i8*> %G, <4 x i8*> %G, <4 x i32> <i32 7, i32 1, i32 4, i32 3>
+  store <4 x i8*> %T, <4 x i8*>* %p
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD2
+define <4 x i8*> @LOAD2(<4 x i8*>* %p) nounwind {
+entry:
+  %I = alloca <4 x i8*>
+;CHECK: sub
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  store <4 x i8*> %G, <4 x i8*>* %I
+;CHECK: movaps
+  %Z = load <4 x i8*>* %I
+  ret <4 x i8*> %Z
+;CHECK: add
+;CHECK: ret
+}
+
+;CHECK: INT2PTR0
+define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+;CHECK: movaps
+  %K = ptrtoint <4 x i8*> %G to <4 x i32>
+;CHECK: ret
+  ret <4 x i32> %K
+}
+
+;CHECK: INT2PTR1
+define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
+entry:
+  %G = load <4 x i8>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: pand
+  %K = inttoptr <4 x i8> %G to <4 x i32*>
+;CHECK: ret
+  ret <4 x i32*> %K
+}
+
+;CHECK: BITCAST0
+define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+  %T = bitcast <4 x i8*> %G to <4 x i32*>
+;CHECK: movaps
+;CHECK: ret
+  ret <4 x i32*> %T
+}
+
+;CHECK: BITCAST1
+define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
+entry:
+  %G = load <2 x i8*>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pinsrd
+  %T = bitcast <2 x i8*> %G to <2 x i32*>
+;CHECK: ret
+  ret <2 x i32*> %T
+}
+
+;CHECK: ICMP0
+define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp sgt <4 x i8*> %g0, %g1
+  ;CHECK: pcmpgtd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
+;CHECK: ICMP1
+define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp eq <4 x i8*> %g0, %g1
+  ;CHECK: pcmpeqd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
new file mode 100644
index 0000000..13070d1
--- /dev/null
+++ b/test/CodeGen/X86/pr11202.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+@bb = constant [1 x i8*] [i8* blockaddress(@main, %l2)]
+
+define void @main() {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l2, %entry
+  %a = zext i1 false to i32
+  br label %l2
+
+l2:                                               ; preds = %l1
+  %b = zext i1 false to i32
+  br label %l1
+}
+
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp0
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
new file mode 100644
index 0000000..e1fa032
--- /dev/null
+++ b/test/CodeGen/X86/pr11415.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+
+; We used to consider the early clobber in the second asm statement as
+; defining %0 before it was read. This caused us to omit the
+; movq	-8(%rsp), %rdx
+
+; CHECK: 	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rcx, %rax
+; CHECK-NEXT:	movq	%rax, -8(%rsp)
+; CHECK-NEXT:	movq	-8(%rsp), %rdx
+; CHECK-NEXT:	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rdx, %rax
+; CHECK-NEXT:	movq	%rdx, -8(%rsp)
+; CHECK-NEXT:	ret
+
+define i64 @foo() {
+entry:
+  %0 = tail call i64 asm "", "={cx}"() nounwind
+  %1 = tail call i64 asm "", "=&r,0,r,~{rax}"(i64 %0, i64 %0) nounwind
+  ret i64 %1
+}
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
new file mode 100644
index 0000000..f29e50e
--- /dev/null
+++ b/test/CodeGen/X86/pr12360.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define zeroext i1 @f1(i8* %x) {
+; CHECK: f1:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = trunc i8 %0 to i1
+  ret i1 %tobool
+}
+
+define zeroext i1 @f2(i8* %x) {
+; CHECK: f2:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+!0 = metadata !{i8 0, i8 2}
+
+
+; check that we don't build a "trunc" from i1 to i1, which would assert.
+define zeroext i1 @f3(i1 %x) {
+; CHECK: f3:
+
+entry:
+  %tobool = icmp ne i1 %x, 0
+  ret i1 %tobool
+}
+
+; check that we don't build a trunc when other bits are needed
+define zeroext i1 @f4(i32 %x) {
+; CHECK: f4:
+; CHECK: and
+
+entry:
+  %y = and i32 %x, 32768
+  %z = icmp ne i32 %y, 0
+  ret i1 %z
+}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 945ec4c..9b0ef83 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 define i32 @main() {
 entry:
 ; CHECK: flds
-	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp6 = load volatile float* @a		; <float> [#uses=1]
 ; CHECK: fstps (%esp)
 ; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
@@ -41,7 +41,7 @@ entry:
 ; CHECK: fstp
 
 ; CHECK: fldl
-	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp12 = load volatile double* @b		; <double> [#uses=1]
 ; CHECK: fstpl (%esp)
 ; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 2a8bb35..02a3605 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -15,17 +15,17 @@ define void @loop_2() nounwind  {
 ; CHECK-NEXT: addl $3, (%{{.*}})
 ; CHECK-NEXT: ret
 
-  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp = load volatile i32* @x, align 4           ; <i32> [#uses=1]
   %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
-  volatile store i32 %tmp1, i32* @x, align 4
-  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1, i32* @x, align 4
+  %tmp.1 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.1, i32* @x, align 4
-  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.2, i32* @x, align 4
-  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.3, i32* @x, align 4
+  store volatile i32 %tmp1.3, i32* @x, align 4
   ret void
 }
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
deleted file mode 100644
index a4204e5..0000000
--- a/test/CodeGen/X86/pr3495-2.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
-; PR3495
-;
-; This test may not be testing what it was supposed to test.
-; It used to have two spills and four reloads, but not it only has one spill and one reload.
-
-target datalayout = "e-p:32:32:32"
-target triple = "i386-apple-darwin9.6"
-	%struct.constraintVCGType = type { i32, i32, i32, i32 }
-	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
-
-define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
-entry:
-	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb5, label %bb.nph3
-
-bb.nph3:		; preds = %entry
-	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb3, %bb.nph3
-	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
-	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
-	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb1, label %bb3
-
-bb1:		; preds = %bb
-	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=2]
-	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
-	br i1 %10, label %bb2, label %bb3
-
-bb2:		; preds = %bb1
-	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
-	br label %bb.i
-
-bb.i:		; preds = %bb.i, %bb2
-	br label %bb.i
-
-bb3:		; preds = %bb1, %bb
-	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
-	br i1 %14, label %bb, label %bb5
-
-bb5:		; preds = %bb3, %entry
-	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
-	store i32 %label, i32* %15, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
deleted file mode 100644
index 7efd35b..0000000
--- a/test/CodeGen/X86/pr3495.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of loads added} | grep 2
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of machine instrs printed} | grep 34
-; PR3495
-;
-; Note: this should not spill at all with either good LSR or good regalloc.
-
-target triple = "i386-pc-linux-gnu"
-@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
-@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
-@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
-@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
-
-define i32 @queens(i32 %c) nounwind {
-entry:
-	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
-	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb569, %entry
-	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
-	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
-	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
-	br i1 %tmp29, label %bb569, label %bb31
-
-bb31:		; preds = %bb
-	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
-	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
-	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
-	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
-	br i1 %tmp38, label %bb569, label %bb41
-
-bb41:		; preds = %bb31
-	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
-	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
-	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
-	store i32 0, i32* %tmp62, align 4
-	br label %bb92
-
-bb92:		; preds = %bb545, %bb41
-	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
-	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
-	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
-	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
-	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
-	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
-	br i1 %tmp115, label %bb545, label %bb118
-
-bb118:		; preds = %bb92
-	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
-	store i32 0, i32* %tmp113, align 4
-	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
-	br label %bb142
-
-bb142:		; preds = %bb142, %bb118
-	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
-	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
-	br i1 %exitcond710, label %bb155, label %bb142
-
-bb155:		; preds = %bb142
-	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
-	br label %bb545
-
-bb545:		; preds = %bb155, %bb92
-	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
-	br i1 %exitcond712, label %bb553, label %bb92
-
-bb553:		; preds = %bb545
-	store i32 1, i32* %tmp62, align 4
-	br label %bb569
-
-bb569:		; preds = %bb553, %bb31, %bb
-	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
-	br label %bb
-}
-
-declare i32 @putchar(i32)
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ebe11a5..ec2f302 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; rdar://10538297
 
 define void @t(i8* %ptr) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
new file mode 100644
index 0000000..8b30dc7
--- /dev/null
+++ b/test/CodeGen/X86/promote.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; CHECK: mul_f
+define i32 @mul_f(<4 x i8>* %A) {
+entry:
+; CHECK: pmul
+; CHECK-NOT: mulb
+  %0 = load <4 x i8>* %A, align 8
+  %mul = mul <4 x i8> %0, %0
+  store <4 x i8> %mul, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+
+; CHECK: shuff_f
+define i32 @shuff_f(<4 x i8>* %A) {
+entry:
+; CHECK: pshufb
+; CHECK: paddd
+; CHECK: pshufb
+  %0 = load <4 x i8>* %A, align 8
+  %add = add <4 x i8> %0, %0
+  store <4 x i8> %add, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+; CHECK: bitcast_widen
+define <2 x float> @bitcast_widen(<4 x i32> %in) nounwind readnone {
+entry:
+; CHECK-NOT: pshufd
+ %x = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %y = bitcast <2 x i32> %x to <2 x float>
+ ret <2 x float> %y
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
new file mode 100644
index 0000000..faca3d7
--- /dev/null
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%struct.obj = type { i64 }
+
+; CHECK: _Z7releaseP3obj
+define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
+entry:
+; CHECK: decq	(%{{rdi|rcx}})
+; CHECK-NEXT: je
+  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
+  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+  %1 = bitcast %struct.obj* %o to i8*
+  tail call void @free(i8* %1)
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @free(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+
+%struct.obj2 = type { i64, i32, i16, i8 }
+
+declare void @other(%struct.obj2* ) nounwind;
+
+; CHECK: example_dec
+define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit dec
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: decq ({{.*}})
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %s64, align 8
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit dec
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: decl {{[0-9][0-9]*}}({{.*}})
+  %dec1 = add i32 %1, -1
+  store i32 %dec1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %dec1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit dec
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: decw {{[0-9][0-9]*}}({{.*}})
+  %dec2 = add i16 %2, -1
+  store i16 %dec2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %dec2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit dec
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: decb {{[0-9][0-9]*}}({{.*}})
+  %dec3 = add i8 %3, -1
+  store i8 %dec3, i8* %s8
+  %tobool4 = icmp eq i8 %dec3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                               
+  ret void
+}
+
+; CHECK: example_inc
+define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit inc
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: incq ({{.*}})
+  %inc = add i64 %0, 1
+  store i64 %inc, i64* %s64, align 8
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit inc
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: incl {{[0-9][0-9]*}}({{.*}})
+  %inc1 = add i32 %1, 1
+  store i32 %inc1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %inc1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit inc
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: incw {{[0-9][0-9]*}}({{.*}})
+  %inc2 = add i16 %2, 1
+  store i16 %inc2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %inc2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit inc
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: incb {{[0-9][0-9]*}}({{.*}})
+  %inc3 = add i8 %3, 1
+  store i8 %inc3, i8* %s8
+  %tobool4 = icmp eq i8 %inc3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d936971..d99a7a4 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 9557d17..f092163 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
index 87f65ed..6759115 100644
--- a/test/CodeGen/X86/reghinting.ll
+++ b/test/CodeGen/X86/reghinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s
 ; PR10221
 
 ;; The registers %x and %y must both spill across the finit call.
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index f6f0ed1..75f438d 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
new file mode 100644
index 0000000..0dd74ea
--- /dev/null
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test1:
+; CHECK-SSE: roundss $1
+
+; CHECK-AVX: test1:
+; CHECK-AVX: vroundss $1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test2:
+; CHECK-SSE: roundsd $1
+
+; CHECK-AVX: test2:
+; CHECK-AVX: vroundsd $1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test3:
+; CHECK-SSE: roundss $12
+
+; CHECK-AVX: test3:
+; CHECK-AVX: vroundss $12
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test4:
+; CHECK-SSE: roundsd $12
+
+; CHECK-AVX: test4:
+; CHECK-AVX: vroundsd $12
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test5:
+; CHECK-SSE: roundss $2
+
+; CHECK-AVX: test5:
+; CHECK-AVX: vroundss $2
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test6:
+; CHECK-SSE: roundsd $2
+
+; CHECK-AVX: test6:
+; CHECK-AVX: vroundsd $2
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test7(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test7:
+; CHECK-SSE: roundss $4
+
+; CHECK-AVX: test7:
+; CHECK-AVX: vroundss $4
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test8(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test8:
+; CHECK-SSE: roundsd $4
+
+; CHECK-AVX: test8:
+; CHECK-AVX: vroundsd $4
+}
+
+declare double @rint(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test9:
+; CHECK-SSE: roundss $3
+
+; CHECK-AVX: test9:
+; CHECK-AVX: vroundss $3
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test10:
+; CHECK-SSE: roundsd $3
+
+; CHECK-AVX: test10:
+; CHECK-AVX: vroundsd $3
+}
+
+declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index adc58ac..e99ea93 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -3,9 +3,10 @@
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
 
+; CHECK: vectorDiv
 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
-; CHECK: idivl
-; CHECK: idivl
+; CHECK: idivq
+; CHECK: idivq
 ; CHECK-NOT: idivl
 ; CHECK: ret
 entry:
@@ -32,6 +33,7 @@ entry:
   ret void
 }
 
+; CHECK: test_char_div
 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: idivb
 ; CHECK: idivb
@@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_uchar_div
 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: divb
 ; CHECK: divb
@@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_short_div
 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
   ret <5 x i16>  %div.r
 }
 
+; CHECK: test_ushort_div
 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK-NOT: divw
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
 ; CHECK: ret
   %div.r = udiv <4 x i16> %num, %div
   ret <4 x i16>  %div.r
 }
 
+; CHECK: test_uint_div
 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
 ; CHECK: divl
 ; CHECK: divl
@@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
   ret <3 x i32>  %div.r
 }
 
+; CHECK: test_long_div
 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: idivq
 ; CHECK: idivq
@@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
+; CHECK: test_ulong_div
 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: divq
 ; CHECK: divq
@@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
-
+; CHECK: test_char_rem
 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK-NOT: idivb
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
 ; CHECK: ret
   %rem.r = srem <4 x i8> %num, %rem
   ret <4 x i8>  %rem.r
 }
 
+; CHECK: test_short_rem
 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
   ret <5 x i16>  %rem.r
 }
 
+; CHECK: test_uint_rem
 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 ; CHECK: idivl
 ; CHECK: idivl
@@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 }
 
 
+; CHECK: test_ulong_rem
 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
 ; CHECK: divq
 ; CHECK: divq
@@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
   ret <5 x i64>  %rem.r
 }
 
+; CHECK: test_int_div
 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
 ; CHECK: idivl
 ; CHECK: idivl
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
new file mode 100644
index 0000000..5ce08aa
--- /dev/null
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      cmpl %gs:48, %esp
+; X32-NEXT: ja      .LBB0_2
+
+; X32:      pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X32:      movl %esp, %eax
+; X32-NEXT: subl %ecx, %eax
+; X32-NEXT: cmpl %eax, %gs:48
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      cmpq %fs:112, %rsp
+; X64-NEXT: ja      .LBB0_2
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq %rdi, %fs:112
+
+; X64:      movq %rdi, %rsp
+
+; X64:      movq %rax, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+; X64-NEXT: movq %rax, %rdi
+
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index ecdb00d..5407b87 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,60 +1,97 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
+
+; X64-Solaris: Segmented stacks not supported on this platform
+; X64-MinGW: Segmented stacks not supported on this platform
+; X32-FreeBSD: Segmented stacks not supported on FreeBSD i386
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
-        %mem = alloca i32, i32 %l
-        call void @dummy_use (i32* %mem, i32 %l)
-        %terminate = icmp eq i32 %l, 0
-        br i1 %terminate, label %true, label %false
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; X32-Linux:       test_basic:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB0_2
 
-true:
-        ret i32 0
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
 
-false:
-        %newlen = sub i32 %l, 1
-        %retvalue = call i32 @test_basic(i32 %newlen)
-        ret i32 %retvalue
+; X64-Linux:       test_basic:
 
-; X32:      test_basic:
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB0_2
 
-; X32:      leal -12(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $12
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret 
+; X32-Darwin:      test_basic:
 
-; X32:      movl %eax, %esp
+; X32-Darwin:      movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja      LBB0_2
 
-; X32:      subl $12, %esp
-; X32-NEXT: pushl %ecx
-; X32-NEXT: calll __morestack_allocate_stack_space
-; X32-NEXT: addl $16, %esp
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X64:      test_basic:
+; X64-Darwin:      test_basic:
 
-; X64:      leaq -24(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB0_2
 
-; X64:      movabsq $24, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
 
-; X64:      movq %rsp, %rax
-; X64-NEXT: subq %rcx, %rax
-; X64-NEXT: cmpq %rax, %fs:112
+; X32-MinGW:       test_basic:
 
-; X64:      movq %rax, %rsp
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB0_2
 
-; X64:      movq %rcx, %rdi
-; X64-NEXT: callq __morestack_allocate_stack_space
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_basic:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB0_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
 
 }
 
@@ -63,25 +100,286 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      leal (%esp), %edx
-; X32-NEXT: cmpl %gs:48, %edx
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB1_2
+
+; X32-Linux:       pushl $4
+; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB1_2
+
+; X64-Linux:       movq %r10, %rax
+; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+; X64-Linux-NEXT:  movq %rax, %r10
+
+; X32-Darwin:      movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja      LBB1_2
+
+; X32-Darwin:      pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB1_2
+
+; X64-Darwin:      movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB1_2
+
+; X32-MinGW:       pushl $4
+; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB1_2
+
+; X64-FreeBSD:       movq %r10, %rax
+; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+; X64-FreeBSD-NEXT:  movq %rax, %r10
+
+}
+
+define void @test_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       leal -40012(%esp), %ecx
+; X32-Linux-NEXT:  cmpl %gs:48, %ecx
+; X32-Linux-NEXT:  ja      .LBB2_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB2_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja      LBB2_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB2_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       leal -40008(%esp), %ecx
+; X32-MinGW-NEXT:  cmpl %fs:20, %ecx
+; X32-MinGW-NEXT:  ja      LBB2_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB2_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; X32-Linux:       test_fastcc:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB3_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc:
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB3_2
+
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc:
+
+; X32-Darwin:      movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja      LBB3_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc:
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB3_2
+
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc:
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB3_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB3_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       test_fastcc_large:
+
+; X32-Linux:       leal -40012(%esp), %eax
+; X32-Linux-NEXT:  cmpl %gs:48, %eax
+; X32-Linux-NEXT:  ja      .LBB4_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc_large:
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB4_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc_large:
+
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja      LBB4_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc_large:
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB4_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc_large:
+
+; X32-MinGW:       leal -40008(%esp), %eax
+; X32-MinGW-NEXT:  cmpl %fs:20, %eax
+; X32-MinGW-NEXT:  ja      LBB4_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc_large:
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB4_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 %a)
+        ret void
 
+; This is testing that the Mac implementation preserves ecx
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret
+; X32-Darwin:      test_fastcc_large_with_ecx_arg:
 
-; X64:      leaq (%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja      LBB5_2
 
-; X64:      movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64:      movq %rax, %r10
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
 }
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
index b2b9f81..a128af9 100644
--- a/test/CodeGen/X86/sext-subreg.ll
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -8,10 +8,10 @@ define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
 ; CHECK: movl %eax
   %C = add i64 %A, %B
   %D = trunc i64 %C to i32
-  volatile store i32 %D, i32* %P
+  store volatile i32 %D, i32* %P
   %E = shl i64 %C, 32
   %F = ashr i64 %E, 32  
-  volatile store i64 %F, i64 *%P2
-  volatile store i32 %D, i32* %P
+  store volatile i64 %F, i64 *%P2
+  store volatile i32 %D, i32* %P
   ret i64 undef
 }
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index fd278c2..b747cc5 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86    | grep and | count 2
 ; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
@@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
        ret i32 %res
 }
 
+define i32 @t2(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 63
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
 @X = internal global i16 0
 
-define void @t2(i16 %t) nounwind {
+define void @t3(i16 %t) nounwind {
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
        ret void
 }
 
-define i64 @t3(i64 %t, i64 %val) nounwind {
+define i64 @t4(i64 %t, i64 %val) nounwind {
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+define i64 @t5(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 191
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index e443ac1..51f8303 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s | not grep shrl
+; RUN: llc -march=x86 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+@array = weak global [4 x i32] zeroinitializer
+
+define i32 @test_lshr_and(i32 %x) {
+; CHECK: test_lshr_and:
+; CHECK-NOT: shrl
+; CHECK: andl $12,
+; CHECK: movl {{.*}}array{{.*}},
+; CHECK: ret
 
-define i32 @foo(i32 %x) {
 entry:
-	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
-	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
-	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	ret i32 %tmp5
+  %tmp2 = lshr i32 %x, 2
+  %tmp3 = and i32 %tmp2, 3
+  %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
+  %tmp5 = load i32* %tmp4, align 4
+  ret i32 %tmp5
 }
 
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d9c3061..3ea6011 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,28 +1,70 @@
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep {s\[ah\]\[rl\]l} | count 1
-
-define i32* @test1(i32* %P, i32 %X) nounwind {
-        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
-        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
-        ret i32* %P2
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @test1(i32* %P, i32 %X) {
+; CHECK: test1:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = lshr i32 %X, 2
+  %gep.upgrd.1 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+  ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) nounwind {
-        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
-        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test2(i32* %P, i32 %X) {
+; CHECK: test2:
+; CHECK: shll $4
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = shl i32 %X, 2
+  %gep.upgrd.2 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+  ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) nounwind {
-        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
-        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test3(i32* %P, i32 %X) {
+; CHECK: test3:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = ashr i32 %X, 2
+  %P2 = getelementptr i32* %P, i32 %Y
+  ret i32* %P2
 }
 
-define fastcc i32 @test4(i32* %d) nounwind {
+define fastcc i32 @test4(i32* %d) {
+; CHECK: test4:
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
   %tmp4 = load i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %val.zext = zext i32 %val to i64
+  %sum = add i64 %val.zext, %index.zext
+  ret i64 %sum
+}
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
new file mode 100644
index 0000000..f00058a
--- /dev/null
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
+
+; Make sure that we don't generate an illegal i64 extract after LegalizeType.
+; CHECK: shll
+
+
+define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
+  %0 = load <4 x i64> * %arrayidx, align 32
+  %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
+  %1 = load <4 x i64> * %arrayidx1, align 32
+  %2 = extractelement <4 x i64> %1, i32 0
+  %and = and i64 %2, 63
+  %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
+  %splat = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i64> %0, %splat
+  store <4 x i64> %shl, <4 x i64> * %arrayidx1, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 9d74121..937817e 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse3 | FileCheck %s --check-prefix=X64_BAD
 
 ; Sibcall optimization of expanded libcalls.
 ; rdar://8707777
@@ -29,3 +30,31 @@ entry:
 declare float @sinf(float) nounwind readonly
 
 declare double @sin(double) nounwind readonly
+
+; rdar://10930395
+%0 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable optsize ssp {
+; X64_BAD: foo
+; X64_BAD: call
+; X64_BAD: call
+; X64_BAD: call
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %2 = bitcast %0* %self to i8*
+  %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
+  %4 = extractvalue { double, double } %3, 0
+  %5 = extractvalue { double, double } %3, 1
+  %6 = tail call double @floor(double %4) optsize
+  %7 = tail call double @floor(double %5) optsize
+  %insert.i.i = insertvalue { double, double } undef, double %6, 0
+  %insert5.i.i = insertvalue { double, double } %insert.i.i, double %7, 1
+  ret { double, double } %insert5.i.i
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare double @floor(double) optsize
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 2b13029..81a072f 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
 ; rdar://7434544
 
-define <2 x i64> @t2() nounwind ssp {
+define <2 x i64> @t2() nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: pshufd	$85, (%esp), %xmm0
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
deleted file mode 100644
index b945530..0000000
--- a/test/CodeGen/X86/sret.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep ret | grep 4
-
-	%struct.foo = type { [4 x i32] }
-
-define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
-entry:
-	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
-	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
-	store i32 1, i32* %tmp3, align 8
-        ret void
-}
-
-@dst = external global i32
-
-define void @foo() nounwind {
-	%memtmp = alloca %struct.foo, align 4
-        call void @bar( %struct.foo* sret %memtmp ) nounwind
-        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
-	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
-        store i32 %tmp6, i32* @dst
-        ret void
-}
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 04f2161..b6b0471 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     movapd
 ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
 ; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
new file mode 100644
index 0000000..d1e07c8
--- /dev/null
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+; CHECK: f
+;
+; This function contains load / store / and operations that all can execute in
+; any domain.  The only domain-specific operation is the %add = shl... operation
+; which is <4 x i32>.
+;
+; The paddd instruction can only influence the other operations through the loop
+; back-edge. Check that everything is still moved into the integer domain.
+
+define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %while.body
+
+; Materialize a zeroinitializer and a constant-pool load in the integer domain.
+; The order is not important.
+; CHECK: pxor
+; CHECK: movdqa
+
+; The instructions in the loop must all be integer domain as well.
+; CHECK: while.body
+; CHECK: pand
+; CHECK: movdqa
+; CHECK: movdqa
+; Finally, the controlling integer-only instruction.
+; CHECK: paddd
+while.body:
+  %p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
+  %dec = add nsw i32 %n.addr.03, -1
+  %and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
+  %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
+  store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
+  %0 = load <4 x i32>* %incdec.ptr, align 16
+  %add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; CHECK: f2
+; CHECK: for.body
+;
+; This loop contains two cvtsi2ss instructions that update the same xmm
+; register.  Verify that the execution dependency fix pass breaks those
+; dependencies by inserting xorps instructions.
+;
+; If the register allocator chooses different registers for the two cvtsi2ss
+; instructions, they are still dependent on themselves.
+; CHECK: xorps [[XMM1:%xmm[0-9]+]]
+; CHECK: , [[XMM1]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM1]]
+; CHECK: xorps [[XMM2:%xmm[0-9]+]]
+; CHECK: , [[XMM2]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM2]]
+;
+define float @f2(i32 %m) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %m, 0
+  br i1 %tobool3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %m.addr.07 = phi i32 [ %dec, %for.body ], [ %m, %entry ]
+  %s1.06 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
+  %s2.05 = phi float [ %add2, %for.body ], [ 0.000000e+00, %entry ]
+  %n.04 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
+  %conv = sitofp i32 %n.04 to float
+  %add = fadd float %s1.06, %conv
+  %conv1 = sitofp i32 %m.addr.07 to float
+  %add2 = fadd float %s2.05, %conv1
+  %inc = add nsw i32 %n.04, 1
+  %dec = add nsw i32 %m.addr.07, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s1.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %s2.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add2, %for.body ]
+  %sub = fsub float %s1.0.lcssa, %s2.0.lcssa
+  ret float %sub
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index af1a73b..1112440 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -140,15 +140,15 @@ define double @ole_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      x_ogt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ogt(double %x) nounwind {
@@ -158,15 +158,15 @@ define double @x_ogt(double %x) nounwind {
 }
 
 ; CHECK:      x_olt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_olt(double %x) nounwind {
@@ -176,17 +176,17 @@ define double @x_olt(double %x) nounwind {
 }
 
 ; CHECK:      x_ogt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -197,17 +197,17 @@ define double @x_ogt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_olt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -220,11 +220,11 @@ define double @x_olt_inverse(double %x) nounwind {
 ; CHECK:      x_oge:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_oge(double %x) nounwind {
@@ -236,11 +236,11 @@ define double @x_oge(double %x) nounwind {
 ; CHECK:      x_ole:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole:
-; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole:
-; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ole(double %x) nounwind {
@@ -252,12 +252,12 @@ define double @x_ole(double %x) nounwind {
 ; CHECK:      x_oge_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -270,12 +270,12 @@ define double @x_oge_inverse(double %x) nounwind {
 ; CHECK:      x_ole_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -414,11 +414,11 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      x_ugt:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ugt(double %x) nounwind {
@@ -430,11 +430,11 @@ define double @x_ugt(double %x) nounwind {
 ; CHECK:      x_ult:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ult(double %x) nounwind {
@@ -446,12 +446,12 @@ define double @x_ult(double %x) nounwind {
 ; CHECK:      x_ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -464,12 +464,12 @@ define double @x_ugt_inverse(double %x) nounwind {
 ; CHECK:      x_ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -480,16 +480,16 @@ define double @x_ult_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_uge:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_uge(double %x) nounwind {
@@ -499,16 +499,16 @@ define double @x_uge(double %x) nounwind {
 }
 
 ; CHECK:      x_ule:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ule(double %x) nounwind {
@@ -518,16 +518,16 @@ define double @x_ule(double %x) nounwind {
 }
 
 ; CHECK:      x_uge_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -538,16 +538,16 @@ define double @x_uge_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ule_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 56b099e..2f4317b 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
-
-
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
 
+; CHECK: vsel_float
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   %A = load <4 x float>* %v1
   %B = load <4 x float>* %v2
@@ -11,8 +13,11 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   ret void
 }
 
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
-
+; CHECK: vsel_i32
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   %A = load <4 x i32>* %v1
   %B = load <4 x i32>* %v2
@@ -21,9 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
@@ -36,14 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-
 define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
   %A = load <4 x double>* %v1
   %B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 70e0a8a..36a0fd9 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -98,7 +98,7 @@ define void @test7() nounwind {
         ret void
         
 ; CHECK: test7:
-; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
 }
 
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
 ; CHECK: test11:
-; CHECK: movapd	4(%esp), %xmm0
+; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8b3a317..5ea1b4d 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -16,10 +16,8 @@ entry:
 	ret void
         
 ; X64: t0:
-; X64: 	movddup	(%rsi), %xmm0
-; X64:  pshuflw	$0, %xmm0, %xmm0
-; X64:	xorl	%eax, %eax
-; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movdqa	(%rsi), %xmm0
+; X64:	pslldq	$2, %xmm0
 ; X64:	movdqa	%xmm0, (%rdi)
 ; X64:	ret
 }
@@ -31,9 +29,8 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	ret <8 x i16> %tmp3
         
 ; X64: t1:
-; X64: 	movl	(%rsi), %eax
 ; X64: 	movdqa	(%rdi), %xmm0
-; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	pinsrw	$0, (%rsi), %xmm0
 ; X64: 	ret
 }
 
@@ -167,12 +164,12 @@ define internal void @t10() nounwind {
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
-; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
-; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
-; X64: 		pinsrw	$2, %eax, [[X2]]
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %ecx
 ; X64: 		pextrw	$6, [[X0]], %eax
-; X64: 		pinsrw	$3, %eax, [[X2]]
+; X64: 		movlhps [[X0]], [[X0]]
+; X64: 		pshuflw	$8, [[X0]], [[X0]]
+; X64: 		pinsrw	$2, %ecx, [[X0]]
+; X64: 		pinsrw	$3, %eax, [[X0]]
 }
 
 
@@ -229,7 +226,7 @@ entry:
 }
 
 
-
+; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
         %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
@@ -250,13 +247,11 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
-; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
-; X64: 		pinsrw	$1, %eax, [[X1]]
-; X64: 		pextrw	$1, [[X1]], %ecx
-; X64: 		movd	[[X1]], %edx
-; X64: 		pinsrw	$0, %edx, %xmm
-; X64: 		pinsrw	$1, %eax, %xmm
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pslldq	$2, %xmm0
+; X64: 		movd	%xmm0, %ecx
+; X64: 		pextrw	$1, %xmm0, %edx
+; X64: 		pinsrw	$0, %ecx, %xmm0
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 2ac4cb4..54264b1 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    insertps  $0, %xmm1, %xmm0        
 }
 
-define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_1:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sete	%al
 }
 
-define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sbbl	%eax
 }
 
-define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_3:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 }
 
 
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 ; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
 ; pointless.
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 793c026..f6c13ec 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -11,13 +11,13 @@ define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-        volatile store double %tmp4, double* %P
+        store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	volatile store double %tmp6, double* %P, align 8
+	store volatile double %tmp6, double* %P, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll
new file mode 100644
index 0000000..18cce72
--- /dev/null
+++ b/test/CodeGen/X86/stack-align2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
+
+define i32 @test() nounwind {
+entry:
+  call void @test2()
+  ret i32 0
+
+; LINUX-I386:     subl	$12, %esp
+; DARWIN-I386:    subl	$12, %esp
+; NETBSD-I386-NOT: subl	{{.*}}, %esp
+
+; LINUX-X86_64:      pushq %{{.*}}
+; LINUX-X86_64-NOT:  subq	{{.*}}, %rsp
+; DARWIN-X86_64:     pushq %{{.*}}
+; DARWIN-X86_64-NOT: subq	{{.*}}, %rsp
+; NETBSD-X86_64:     pushq %{{.*}}
+; NETBSD-X86_64-NOT: subq	{{.*}}, %rsp
+}
+
+declare void @test2()
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
index 37f86c6..aea85b9 100644
--- a/test/CodeGen/X86/store-empty-member.ll
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -9,6 +9,6 @@
 
 define void @foo() nounwind {
   %1 = alloca %testType
-  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  store volatile %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
   ret void
 }
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 1168622..8313166 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -16,9 +17,14 @@ cond_true2732.preheader:                ; preds = %entry
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; CHECK: 	and	{{E..}}, DWORD PTR [360]
-; CHECK:	and	DWORD PTR [356], {{E..}}
-; CHECK:	mov	DWORD PTR [360], {{E..}}
+; INTEL: 	and	{{E..}}, DWORD PTR [360]
+; INTEL:	and	DWORD PTR [356], {{E..}}
+; FIXME:	mov	DWORD PTR [360], {{E..}}
+; The above line comes out as 'mov 360, EAX', but when the register is ECX it works?
+
+; ATT: 	andl	360, %{{e..}}
+; ATT:	andl	%{{e..}}, 356
+; ATT:	movl	%{{e..}}, 360
 
 }
 
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 1251a24..81de22c 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86            | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86            | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 4522e91..749b5db 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c5a105c..c68a8c6 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -2,8 +2,8 @@
 
 ; Test that we don't drop a block that has its address taken.
 
+; CHECK: Ltmp0:                                  ## Block address taken
 ; CHECK: Ltmp1:                                  ## Block address taken
-; CHECK: Ltmp2:                                  ## Block address taken
 
 @a = common global i32 0, align 4
 @p = common global i8* null, align 8
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index d6c16ca..f1b9f20 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -314,7 +314,7 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 bbx:
@@ -323,7 +323,7 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 return:
@@ -352,8 +352,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -362,8 +362,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
@@ -390,8 +390,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -400,8 +400,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
new file mode 100644
index 0000000..b628f5e
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -0,0 +1,40 @@
+; RUN: llc -disable-tail-calls < %s | FileCheck --check-prefix=CALL %s
+; RUN: llc < %s | FileCheck --check-prefix=JMP %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @helper() nounwind {
+entry:
+  ret i32 7
+}
+
+define i32 @test1() nounwind {
+entry:
+  %call = tail call i32 @helper()
+  ret i32 %call
+}
+
+; CALL: test1:
+; CALL-NOT: ret
+; CALL: callq helper
+; CALL: ret
+
+; JMP: test1:
+; JMP-NOT: ret
+; JMP: jmp helper # TAILCALL
+
+define i32 @test2() nounwind {
+entry:
+  %call = tail call i32 @test2()
+  ret i32 %call
+}
+
+; CALL: test2:
+; CALL-NOT: ret
+; CALL: callq test2
+; CALL: ret
+
+; JMP: test2:
+; JMP-NOT: ret
+; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7ecf379..7621602 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
 
 ; FIXME: Win64 does not support byval.
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c18c7aa..bff5f99 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
 ; CHECK: subq  ${{24|72|80}}, %rsp
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
new file mode 100644
index 0000000..a7be483
--- /dev/null
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
+
+%class.C = type { i8 }
+%struct.S = type { i32 }
+%struct.M = type { i32, i32 }
+
+declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
+declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
+declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
+
+define void @testv() nounwind {
+; CHECK: testv:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C5SmallEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.S, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
+  ret void
+}
+
+define void @test2v() nounwind {
+; CHECK: test2v:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C6MediumEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.M, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX/EDX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
new file mode 100644
index 0000000..e2e58a54
--- /dev/null
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32: f1:
+; X32:      movl %gs:i@NTPOFF, %eax
+; X32-NEXT: ret
+; X64: f1:
+; X64:      movl %fs:i@TPOFF, %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32: f2:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: leal i@NTPOFF(%eax), %eax
+; X32-NEXT: ret
+; X64: f2:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: leaq i@TPOFF(%rax), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i
+}
+
+define i32 @f3() {
+; X32: f3:
+; X32:      movl i2@INDNTPOFF, %eax
+; X32-NEXT: movl %gs:(%eax), %eax
+; X32-NEXT: ret
+; X64: f3:
+; X64:      movq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: movl %fs:(%rax), %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32: f4:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32-NEXT: ret
+; X64: f4:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
new file mode 100644
index 0000000..e8a79bf
--- /dev/null
+++ b/test/CodeGen/X86/tls.ll
@@ -0,0 +1,329 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32_LINUX: f1:
+; X32_LINUX:      movl %gs:i1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f1:
+; X64_LINUX:      movl %fs:i1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f1:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f1:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i1
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32_LINUX: f2:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f2:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f2:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f2:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32_LINUX: f3:
+; X32_LINUX:      movl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: movl %gs:(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f3:
+; X64_LINUX:      movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f3:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f3:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32_LINUX: f4:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f4:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f4:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f4:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32_LINUX: f5:
+; X32_LINUX:      movl %gs:i3@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f5:
+; X64_LINUX:      movl %fs:i3@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f5:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f5:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i3
+	ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32_LINUX: f6:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f6:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f6:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f6:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i3
+}
+
+define i32 @f7() {
+; X32_LINUX: f7:
+; X32_LINUX:      movl %gs:i4@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f7:
+; X64_LINUX:      movl %fs:i4@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i4
+	ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32_LINUX: f8:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f8:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i4
+}
+
+define i32 @f9() {
+; X32_LINUX: f9:
+; X32_LINUX:      movl %gs:i5@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f9:
+; X64_LINUX:      movl %fs:i5@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i5
+	ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32_LINUX: f10:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f10:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i5
+}
+
+define i16 @f11() {
+; X32_LINUX: f11:
+; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
+; Why is this kill line here, but no where else?
+; X32_LINUX-NEXT: # kill
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f11:
+; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: # kill
+; X64_LINUX-NEXT: ret
+; X32_WIN: f11:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: # kill
+; X32_WIN-NEXT: ret
+; X64_WIN: f11:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: # kill
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+	ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32_LINUX: f12:
+; X32_LINUX:      movswl %gs:s1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f12:
+; X64_LINUX:      movswl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f12:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f12:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32_LINUX: f13:
+; X32_LINUX:      movb %gs:b1@NTPOFF, %al
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f13:
+; X64_LINUX:      movb %fs:b1@TPOFF, %al
+; X64_LINUX-NEXT: ret
+; X32_WIN: f13:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: ret
+; X64_WIN: f13:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+	ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32_LINUX: f14:
+; X32_LINUX:      movsbl %gs:b1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f14:
+; X64_LINUX:      movsbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f14:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f14:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
deleted file mode 100644
index 0cae5c4..0000000
--- a/test/CodeGen/X86/tls1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i32 15
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
deleted file mode 100644
index fb61596..0000000
--- a/test/CodeGen/X86/tls10.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
deleted file mode 100644
index 514a168..0000000
--- a/test/CodeGen/X86/tls11.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i16 15
-
-define i16 @f() {
-entry:
-	%tmp1 = load i16* @i
-	ret i16 %tmp1
-}
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
deleted file mode 100644
index c29f6ad..0000000
--- a/test/CodeGen/X86/tls12.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
-
-@i = thread_local global i8 15
-
-define i8 @f() {
-entry:
-	%tmp1 = load i8* @i
-	ret i8 %tmp1
-}
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
deleted file mode 100644
index 08778ec..0000000
--- a/test/CodeGen/X86/tls13.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i16 0
-@j = thread_local global i16 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i16* @i, align 2
-        %1 = sext i16 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i16* @j, align 2
-        %3 = zext i16 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
deleted file mode 100644
index 88426dd..0000000
--- a/test/CodeGen/X86/tls14.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i8 0
-@j = thread_local global i8 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i8* @i, align 2
-        %1 = sext i8 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i8* @j, align 2
-        %3 = zext i8 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
deleted file mode 100644
index 7abf070..0000000
--- a/test/CodeGen/X86/tls15.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t | count 1
-; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
-; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
-; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
-; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 0
-@j = thread_local global i32 0
-
-define void @f(i32** %a, i32** %b) {
-entry:
-	store i32* @i, i32** %a, align 8
-	store i32* @j, i32** %b, align 8
-	ret void
-}
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
deleted file mode 100644
index 5a94296..0000000
--- a/test/CodeGen/X86/tls2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
deleted file mode 100644
index 7327cc4..0000000
--- a/test/CodeGen/X86/tls3.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	i@INDNTPOFF, %eax} %t
-; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
-; RUN: grep {movl	%fs:(%rax), %eax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
deleted file mode 100644
index d2e40e3..0000000
--- a/test/CodeGen/X86/tls4.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
deleted file mode 100644
index 4d2cc02..0000000
--- a/test/CodeGen/X86/tls5.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
deleted file mode 100644
index 505106e..0000000
--- a/test/CodeGen/X86/tls6.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
deleted file mode 100644
index e9116e7..0000000
--- a/test/CodeGen/X86/tls7.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
deleted file mode 100644
index 375af94..0000000
--- a/test/CodeGen/X86/tls8.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
deleted file mode 100644
index 7d08df8..0000000
--- a/test/CodeGen/X86/tls9.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a1d797f..9d58019 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,7 +5,7 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s
 
 @G = external global i32
 
@@ -14,7 +14,7 @@ define i32 @test1(i32 %X) nounwind {
 ; CHECK-NOT: mov
 ; CHECK: leal 1(%rdi)
         %Z = add i32 %X, 1
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index 1dbbdcf..e853e77 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
@@ -6,37 +6,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; FIXME: This test could generate this code:
-;
-; ## BB#0:                                ## %entry
-; 	testq	%rdi, %rdi
-; 	jns	LBB0_2
-; ## BB#1:
-; 	movq	%rdi, %rax
-; 	shrq	%rax
-; 	andq	$1, %rdi
-; 	orq	%rax, %rdi
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	addss	%xmm0, %xmm0
-; 	ret
-; LBB0_2:                                 ## %entry
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	ret
-;
-; The blocks come from lowering:
-;
-;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
-;
-; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
-; as live-in on the new blocks, and machine sinking would be able to sink
-; everything below the test.
-
-; CHECK: shrq
-; CHECK: andq
-; CHECK-NEXT: orq
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
-; CHECK: cvtsi2ss
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
index eeebcee..b066297 100644
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
new file mode 100644
index 0000000..af76a33
--- /dev/null
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10 | FileCheck %s
+; <rdar://problem/10655949>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = internal unnamed_addr constant [5 x i16] [i16 252, i16 98, i16 101, i16 114, i16 0], align 2
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
+
+; CHECK:         .section      __TEXT,__ustring
+; CHECK-NEXT:    .align        1
+; CHECK-NEXT: _.str:
+; CHECK-NEXT:    .short  252     ## 0xfc
+; CHECK-NEXT:    .short  98      ## 0x62
+; CHECK-NEXT:    .short  101     ## 0x65
+; CHECK-NEXT:    .short  114     ## 0x72
+; CHECK-NEXT:    .short  0       ## 0x0
+
+define i32 @main() uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+  ret i32 0
+}
+
+declare void @NSLog(%0*, ...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/X86/utf8.ll b/test/CodeGen/X86/utf8.ll
new file mode 100644
index 0000000..67bc5ae2
--- /dev/null
+++ b/test/CodeGen/X86/utf8.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: iΔ
+@"i\CE\94" = common global i32 0, align 4
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 3bee700..8655c6c 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
 ; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index b3efc7b..f2fc7e7 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 04bb725..91777f7 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
@@ -8,9 +8,12 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
+; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: pcmpgt
-; CHECK: blendvps
+; CHECK: movzwl
+; CHECK: movzwl
+; CHECK: pshufd
+; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
   %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
   %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f0158d6..bddd535 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=x86-64
 
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index f487654..42d7f27 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: grep movss   %t | count 2
+; RUN: not grep movss   %t
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
new file mode 100644
index 0000000..05b263e
--- /dev/null
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+
+; PR11674
+define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
+entry:
+; TODO: We should be able to generate cvtps2pd for the load.
+; For now, just check that we generate something sane.
+; CHECK: cvtss2sd
+; CHECK: cvtss2sd
+  %0 = load <2 x float>* %in, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  store <2 x double> %1, <2 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 676be9b..2cf5dc6 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,12 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pxor %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: not grep shuf %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; Without any typed operations, always use the smaller xorps.
+; CHECK: test
+; CHECK: xorps
 define <2 x double> @test() {
 	ret <2 x double> zeroinitializer
 }
 
+; Prefer a constant pool load here.
+; CHECK: test2
+; CHECK-NOT: shuf
+; CHECK: movaps {{.*}}CPI
 define <4 x i32> @test2() nounwind  {
 	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
 }
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index fc06b95..b6b8ba6 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 05a3a1e..2468735 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -5,7 +5,7 @@ define i32 @t() nounwind {
 entry:
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 1b104de..d038daf 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -5,7 +5,7 @@ entry:
 ; CHECK: punpckldq
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 950040a..430aa04 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -4,10 +4,10 @@
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
-; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
-; CHECK: movaps  %xmm0, %xmm1
-; CHECK-NEXT: movss   %xmm2, %xmm1
-; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+; CHECK: movaps  ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]]
+; CHECK: movaps  %[[XMM0]], %[[XMM1:xmm[0-9]+]]
+; CHECK-NEXT: movss   %xmm{{[0-9]+}}, %[[XMM1]]
+; CHECK-NEXT: shufps  $36, %[[XMM1]], %[[XMM0]]
   %0 = load <4 x i32>* undef, align 16
   %1 = load <4 x i32>* %a0, align 16
   %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
@@ -26,10 +26,12 @@ entry:
 
 define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
 entry:
-; CHECK: movaps  32({{%rdi|%rcx}}), %xmm0
-; CHECK-NEXT: movaps  48({{%rdi|%rcx}}), %xmm1
-; CHECK-NEXT: movss   %xmm1, %xmm0
-; CHECK-NEXT: movq    %xmm0, ({{%rsi|%rdx}}) 
+; CHECK: t02
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
   %0 = bitcast <8 x i32>* %source to <4 x i32>*
   %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
   %tmp2 = load <4 x i32>* %arrayidx, align 16
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 69a2ede..96ef883 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -46,10 +46,9 @@ entry:
 
 ; rdar://10119696
 ; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
 entry:
-  ; CHECK: movsd  (%
-  ; CHECK-NEXT: movsd  %xmm
+  ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
   %u110.i = load double* %y, align 1
   %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
   %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
@@ -57,3 +56,22 @@ entry:
   ret <4 x float> %shuffle.i
 }
 
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+  %idx.ext = sext i32 %s to i64
+  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle1.i5
+}
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
new file mode 100644
index 0000000..55531e3
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; rdar://10050222, rdar://10134392
+
+define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %p.val = load <1 x i64>* %p, align 1
+  %0 = bitcast <1 x i64> %p.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle1.i
+}
+
+define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1a:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = load double* %0
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = bitcast <2 x double> %2 to <4 x float>
+  %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %4
+}
+
+define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %cast.i = bitcast <4 x float> %a to <2 x i64>
+  %extract.i = extractelement <2 x i64> %cast.i, i32 0
+  %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
+  store i64 %extract.i, i64* %0, align 8
+  ret void
+}
+
+define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2a:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = bitcast <4 x float> %a to <2 x double>
+  %2 = extractelement <2 x double> %1, i32 0
+  store double %2, double* %0
+  ret void
+}
+
+; rdar://10436044
+define <2 x double> @t3() nounwind readonly {
+bb:
+; CHECK: t3:
+; CHECK: punpcklqdq %xmm1, %xmm0
+; CHECK: movq (%rax), %xmm1
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
+  %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
+  %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %tmp7
+}
+
+; rdar://10450317
+define <2 x i64> @t4() nounwind readonly {
+bb:
+; CHECK: t4:
+; CHECK: punpcklqdq %xmm0, %xmm1
+; CHECK: movq (%rax), %xmm0
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
+  %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %tmp6
+}
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 2a48de2..6599598 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 | FileCheck %s
 
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
 	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v2sd
+; CHECK: movups	8(%esp)
+; CHECK: movaps
 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
diff --git a/test/CodeGen/X86/vec_udiv_to_shift.ll b/test/CodeGen/X86/vec_udiv_to_shift.ll
new file mode 100644
index 0000000..6edfcc0
--- /dev/null
+++ b/test/CodeGen/X86/vec_udiv_to_shift.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <8 x i16> @udiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: lshr <8 x i16> %var, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+%0 = udiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+ret <8 x i16> %0
+}
+
+define <4 x i32> @udiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: lshr <4 x i32> %var, <i32 4, i32 4, i32 4, i32 4>
+%0 = udiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 4d1f056..682a0df 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; CHECK: foo
 ; CHECK: xorps
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
@@ -8,6 +9,7 @@ define void @foo(<4 x float>* %P) {
         ret void
 }
 
+; CHECK: bar
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
@@ -16,3 +18,13 @@ define void @bar(<4 x i32>* %P) {
         ret void
 }
 
+; Without any type hints from operations, we fall back to the smaller xorps.
+; The IR type <4 x i32> is ignored.
+; CHECK: untyped_zero
+; CHECK: xorps
+; CHECK: movaps
+define void @untyped_zero(<4 x i32>* %p) {
+entry:
+  store <4 x i32> zeroinitializer, <4 x i32>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 8aa5094..41ea024 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
 ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
 ; 64-bit stores here do not use MMX.
 
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
new file mode 100644
index 0000000..3476e36
--- /dev/null
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: AGEP0:
+define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
+entry:
+  %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
+  %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
+  %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
+  %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+;CHECK: pslld $2
+;CHECK: padd
+  %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
+  ret <4 x i32*> %A3
+;CHECK: ret
+}
+
+;CHECK: AGEP1:
+define i32 @AGEP1(<4 x i32*> %param) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP2:
+define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP3:
+define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %v = alloca i32
+  %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
+  ret <4 x i32*> %k
+;CHECK: ret
+}
+
+;CHECK: AGEP4:
+define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
+entry:
+; Multiply offset by two (add it to itself).
+;CHECK: padd
+; add the base to the offset
+;CHECK: padd
+  %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+  ret <4 x i16*> %A
+;CHECK: ret
+}
+
+;CHECK: AGEP5:
+define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
+entry:
+;CHECK: paddd
+  %A = getelementptr <4 x i8*> %param, <4 x i8> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+
+; The size of each element is 1 byte. No need to multiply by element size.
+;CHECK: AGEP6:
+define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-NOT: pslld
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
new file mode 100644
index 0000000..d47df90
--- /dev/null
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+define i64 @__builtin_ia32_vec_ext_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = extractelement <2 x i64> %3, i32 %4
+  ret i64 %5
+}
+
+define <2 x i64> @__builtin_ia32_vec_int_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = insertelement <2 x i64> %3, i64 1, i32 %4
+  ret <2 x i64> %5
+}
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index 2e5742a..1a82014 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
 @x = external global double
 
 define void @foo() nounwind  {
-  %a = volatile load double* @x
-  volatile store double 0.0, double* @x
-  volatile store double 0.0, double* @x
-  %b = volatile load double* @x
+  %a = load volatile double* @x
+  store volatile double 0.0, double* @x
+  store volatile double 0.0, double* @x
+  %b = load volatile double* @x
   ret void
 }
 
 define void @bar() nounwind  {
-  %c = volatile load double* @x
+  %c = load volatile double* @x
   ret void
 }
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index 97dacfd..ee98806 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 |  FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux |  FileCheck %s
 
-
-define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
-; CHECK: andb
+define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: t0
+; CHECK: pand
+; CHECK: ret
   %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
   %t1 = and <2 x i1> %cmp1, %cmp2
@@ -12,7 +13,9 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado
 }
 
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
-; CHECK: andb
+; CHECK: t2
+; CHECK-NOT: pand
+; CHECK: ret
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
   %t1 = and <3 x i1> %cmp1, %cmp2
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 4b8016d..661cde8 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
-
-; Widen a v3i8 to v16i8 to use a vector add
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
 ; CHECK-NOT: pextrw
-; CHECK: paddb
-; CHECK: pextrb
+; CHECK: add
+
 	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%n.addr = alloca i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index 03b3fea..d35abc3 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: paddb
+; CHECK: padd
 ; CHECK: pand
 
 ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 0574923..f55b184 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: paddw
-; CHECK: pextrw
-; CHECK: movd
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; CHECK: incl
+; CHECK: incl
+; CHECK: incl
+; CHECK: addl
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 1eace9e..4330aae 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
-; CHECK: paddw
+; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movd
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 8e1adf5..5ea5426 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,16 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-
-; v8i8 that is widen to v16i8 then split
-; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
-; Unfortunately, we don't split the store so we don't get the code we want.
+; CHECK: psraw
+; CHECK: psraw
 
 define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index f6810cd..51f1c88 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: pshufd
-; CHECK: paddd
+; CHECK: paddq
 
 ; truncate v2i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 80f3a49..affd796 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: cvtsi2ss
+; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index c91627c..4aeec91 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
 
-; CHECK: movl  (%rdi), %[[R1:...]]
-; CHECK: movl  (%rsi), %[[R2:...]]
-; CHECK: movl  %[[R2]], (%rdi)
-; CHECK: movl  %[[R1]], (%rsi)
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  {{.*}}, ({{.*}})
+; CHECK: movd  {{.*}}, ({{.*}})
 
-; WIN64: movl  (%rcx), %[[R1:...]]
-; WIN64: movl  (%rdx), %[[R2:...]]
-; WIN64: movl  %[[R2]], (%rcx)
-; WIN64: movl  %[[R1]], (%rdx)
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  {{.*}}, ({{.*}})
+; WIN64: movd  {{.*}}, ({{.*}})
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index 639617f..9705d14 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
+; PR5626
 
 ; This load should be before the call, not after.
 
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 6422063..79aa000 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
 
 %i32vec3 = type <3 x i32>
+; CHECK: add3i32
 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movdqa
 ; CHECK: paddd
@@ -16,6 +17,7 @@ define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 	ret void
 }
 
+; CHECK: add3i32_2
 define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movq
 ; CHECK: pinsrd
@@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 }
 
 %i32vec7 = type <7 x i32>
+; CHECK: add7i32
 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 ; CHECK: movdqa
 ; CHECK: movdqa
@@ -47,6 +50,7 @@ define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 	ret void
 }
 
+; CHECK: add12i32
 %i32vec12 = type <12 x i32>
 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 ; CHECK: movdqa
@@ -66,12 +70,14 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 }
 
 
+; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: movd
-; CHECK: pextrw
+; CHECK: add3i16
+; CHECK: addl
+; CHECK: addl
+; CHECK: addl
+; CHECK: ret
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
 	%x = add %i16vec3 %a, %b
@@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
 	ret void
 }
 
+; CHECK: add4i16
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
+; CHECK: add4i16
+; CHECK: paddd
 ; CHECK: movq
 	%a = load %i16vec4* %ap, align 16
 	%b = load %i16vec4* %bp, align 16
@@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
 	ret void
 }
 
+; CHECK: add12i16
 %i16vec12 = type <12 x i16>
 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
 ; CHECK: movdqa
@@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
 	ret void
 }
 
+; CHECK: add18i16
 %i16vec18 = type <18 x i16>
 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
 ; CHECK: movdqa
@@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 }
 
 
+; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddb
-; CHECK: pextrb
-; CHECK: movb
+; CHECK: addb
+; CHECK: addb
+; CHECK: addb
+; CHECK: ret
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
 	%x = add %i8vec3 %a, %b
@@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
 	ret void
 }
 
+; CHECK: add31i8:
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
 ; CHECK: movdqa
@@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 ; CHECK: movq
 ; CHECK: pextrb
 ; CHECK: pextrw
+; CHECK: ret
 	%a = load %i8vec31* %ap, align 16
 	%b = load %i8vec31* %bp, align 16
 	%x = add %i8vec31 %a, %b
@@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 }
 
 
+; CHECK: rot
 %i8vec3pack = type { <3 x i8>, i8 }
 define %i8vec3pack  @rot() nounwind {
-; CHECK: shrb
+; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
 entry:
   %X = alloca %i8vec3pack, align 4
   %rot = alloca %i8vec3pack, align 4
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 8e951b7..7bebb27 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 
@@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
@@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,12 +47,23 @@ entry:
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
+; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK: shuf4:
-; CHECK: punpckldq
+; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
+; CHECK: ret
+}
+
+; PR11389: another CONCAT_VECTORS case
+define void @shuf5(<8 x i8>* %p) nounwind {
+; CHECK: shuf5:
+  %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  store <8 x i8> %v, <8 x i8>* %p, align 8
+  ret void
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
new file mode 100644
index 0000000..878c6db
--- /dev/null
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
+; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
+; arguments are caller-cleanup like normal arguments.
+
+define void @sret1(i8* sret) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  ret void
+}
+
+define void @sret2(i32* sret %x, i32 %y) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  store i32 %y, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index e39d007..a961c6a 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index efe8bca..52bc509 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s
 
 ; Verify that the var arg parameters which are passed in registers are stored
 ; in home stack slots allocated by the caller and that AP is correctly
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
new file mode 100644
index 0000000..596b426
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
+
+define i32 @double_ui32_5(double %X) {
+; FTOL: @double_ui32_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i32
+  ret i32 %tmp.1
+}
+
+define i64 @double_ui64_5(double %X) {
+; FTOL: @double_ui64_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i64
+  ret i64 %tmp.1
+}
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index fdf68f9..20bccab 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,9 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
+; CHECK:      padd
 ; CHECK:      pslld
-; CHECK-NEXT: pslld
+; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -19,6 +20,7 @@ entry:
 ; CHECK:      shr4
 ; CHECK:      psrld
 ; CHECK-NEXT: psrld
+; CHECK:      ret
   %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -30,6 +32,7 @@ entry:
 ; CHECK:      sra4
 ; CHECK:      psrad
 ; CHECK-NEXT: psrad
+; CHECK:      ret
   %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -41,6 +44,7 @@ entry:
 ; CHECK:      shl2
 ; CHECK:      psllq
 ; CHECK-NEXT: psllq
+; CHECK:      ret
   %B = shl <2 x i64> %A,  < i64 2, i64 2>
   %C = shl <2 x i64> %A,  < i64 9, i64 9>
   %K = xor <2 x i64> %B, %C
@@ -52,6 +56,7 @@ entry:
 ; CHECK:      shr2
 ; CHECK:      psrlq
 ; CHECK-NEXT: psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 8>
   %C = lshr <2 x i64> %A,  < i64 1, i64 1>
   %K = xor <2 x i64> %B, %C
@@ -62,8 +67,9 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
+; CHECK:      padd
 ; CHECK:      psllw
-; CHECK-NEXT: psllw
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -75,6 +81,7 @@ entry:
 ; CHECK:      shr8
 ; CHECK:      psrlw
 ; CHECK-NEXT: psrlw
+; CHECK:      ret
   %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -86,6 +93,7 @@ entry:
 ; CHECK:      sra8
 ; CHECK:      psraw
 ; CHECK-NEXT: psraw
+; CHECK:      ret
   %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -100,6 +108,7 @@ entry:
 ; CHECK: sll8_nosplat
 ; CHECK-NOT: psll
 ; CHECK-NOT: psll
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -112,6 +121,7 @@ entry:
 ; CHECK: shr2_nosplat
 ; CHECK-NOT:  psrlq
 ; CHECK-NOT:  psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 1>
   %C = lshr <2 x i64> %A,  < i64 1, i64 0>
   %K = xor <2 x i64> %B, %C
@@ -124,7 +134,8 @@ entry:
 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shl2_other
-; CHECK-not:      psllq
+; CHECK: psllq
+; CHECK: ret
   %B = shl <2 x i32> %A,  < i32 2, i32 2>
   %C = shl <2 x i32> %A,  < i32 9, i32 9>
   %K = xor <2 x i32> %B, %C
@@ -134,9 +145,48 @@ entry:
 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shr2_other
-; CHECK-NOT:      psrlq
+; CHECK: psrlq
+; CHECK: ret
   %B = lshr <2 x i32> %A,  < i32 8, i32 8>
   %C = lshr <2 x i32> %A,  < i32 1, i32 1>
   %K = xor <2 x i32> %B, %C
   ret <2 x i32> %K
 }
+
+define <16 x i8> @shl9(<16 x i8> %A) nounwind {
+  %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shl9:
+; CHECK: psllw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @shr9(<16 x i8> %A) nounwind {
+  %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shr9:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8_7:
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 0000000..a2521b0
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vpermil2pd
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a2
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a1
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a2
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vpermil2ps
+  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vpermil2ps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpcmov
+  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %vec = load <4 x i64>* %a1
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK:vpcomb
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+  ; CHECK: vphaddbd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+  ; CHECK: vphaddbq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+  ; CHECK: vphaddbw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+  ; CHECK: vphadddq
+  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+  ; CHECK: vphaddubd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+  ; CHECK: vphaddubq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+  ; CHECK: vphaddubw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+  ; CHECK: vphaddudq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+  ; CHECK: vphadduwd
+  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+  ; CHECK: vphadduwq
+  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+  ; CHECK: vphaddwd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+  ; CHECK: vphaddwq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+  ; CHECK: vphsubbw
+  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+  ; CHECK: vphsubdq
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubdq
+  %vec = load <4 x i32>* %a0
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+  ; CHECK: vphsubwd
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubwd
+  %vec = load <8 x i16>* %a0
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacssdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacssww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacsww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpmadcswd
+  %vec = load <8 x i16>* %a1
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpperm
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshab
+  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshad
+  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshaq
+  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshaw
+  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshlb
+  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshld
+  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshlq
+  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshlw
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a1
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a0
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %elem = load float* %a1
+  %vec = insertelement <4 x float> undef, float %elem, i32 0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %elem = load double* %a1
+  %vec = insertelement <2 x double> undef, double %elem, i32 0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+  ; CHECK: vfrczpd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  %vec = load <2 x double>* %a0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a0
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+  ; CHECK: vfrczps
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  %vec = load <4 x float>* %a0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+  ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %vec = load <8 x float>* %a0
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 178c59d..ddc4cab 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -8,7 +8,7 @@ define <4 x i32> @test1() nounwind {
 	ret <4 x i32> %tmp
         
 ; X32: test1:
-; X32:	pxor	%xmm0, %xmm0
+; X32:	xorps	%xmm0, %xmm0
 ; X32:	ret
 }
 
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 4470074..4242530 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -16,9 +16,9 @@ define double @foo() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foo:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
 
@@ -33,8 +33,8 @@ define float @foof() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foof:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index b3f5cdb..ff93c68 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -34,7 +34,7 @@ define void @test3(i8 %x) nounwind readnone {
   ret void
 }
 ; CHECK: test3
-; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
 ; CHECK-NEXT: andl $224, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index cea9e9c..6432ae3 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; <rdar://problem/8006248>
 
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 7d6d7ba..84e21e4 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -16,5 +16,5 @@ allocas:
 ; CHECK: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
-; CHECK extsp 1
-; CHECK bl g
+; CHECK: extsp 1
+; CHECK: bl g
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
deleted file mode 100644
index 8211f85..0000000
--- a/test/CodeGen/XCore/cos.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl cosf" %t1.s | count 1
-; RUN: grep "bl cos" %t1.s | count 2
-declare double @llvm.cos.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.cos.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.cos.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.cos.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/dg.exp b/test/CodeGen/XCore/dg.exp
deleted file mode 100644
index 7110eab..0000000
--- a/test/CodeGen/XCore/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target XCore] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
deleted file mode 100644
index d23d484..0000000
--- a/test/CodeGen/XCore/exp.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl expf" %t1.s | count 1
-; RUN: grep "bl exp" %t1.s | count 2
-declare double @llvm.exp.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
deleted file mode 100644
index 4c4d17f4..0000000
--- a/test/CodeGen/XCore/exp2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl exp2f" %t1.s | count 1
-; RUN: grep "bl exp2" %t1.s | count 2
-declare double @llvm.exp2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/float-intrinsics.ll b/test/CodeGen/XCore/float-intrinsics.ll
new file mode 100644
index 0000000..69a40f3
--- /dev/null
+++ b/test/CodeGen/XCore/float-intrinsics.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare double @llvm.cos.f64(double)
+declare double @llvm.exp.f64(double)
+declare double @llvm.exp2.f64(double)
+declare double @llvm.log.f64(double)
+declare double @llvm.log10.f64(double)
+declare double @llvm.log2.f64(double)
+declare double @llvm.pow.f64(double, double)
+declare double @llvm.powi.f64(double, i32)
+declare double @llvm.sin.f64(double)
+declare double @llvm.sqrt.f64(double)
+
+define double @cos(double %F) {
+; CHECK: cos:
+; CHECK: bl cos
+        %result = call double @llvm.cos.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.cos.f32(float)
+
+; CHECK: cosf:
+; CHECK: bl cosf
+define float @cosf(float %F) {
+        %result = call float @llvm.cos.f32(float %F)
+	ret float %result
+}
+
+define double @exp(double %F) {
+; CHECK: exp:
+; CHECK: bl exp
+        %result = call double @llvm.exp.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp.f32(float)
+
+define float @expf(float %F) {
+; CHECK: expf:
+; CHECK: bl expf
+        %result = call float @llvm.exp.f32(float %F)
+	ret float %result
+}
+
+define double @exp2(double %F) {
+; CHECK: exp2:
+; CHECK: bl exp2
+        %result = call double @llvm.exp2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp2.f32(float)
+
+define float @exp2f(float %F) {
+; CHECK: exp2f:
+; CHECK: bl exp2f
+        %result = call float @llvm.exp2.f32(float %F)
+	ret float %result
+}
+
+define double @log(double %F) {
+; CHECK: log:
+; CHECK: bl log
+        %result = call double @llvm.log.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log.f32(float)
+
+define float @logf(float %F) {
+; CHECK: logf:
+; CHECK: bl logf
+        %result = call float @llvm.log.f32(float %F)
+	ret float %result
+}
+
+define double @log10(double %F) {
+; CHECK: log10:
+; CHECK: bl log10
+        %result = call double @llvm.log10.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log10.f32(float)
+
+define float @log10f(float %F) {
+; CHECK: log10f:
+; CHECK: bl log10f
+        %result = call float @llvm.log10.f32(float %F)
+	ret float %result
+}
+
+define double @log2(double %F) {
+; CHECK: log2:
+; CHECK: bl log2
+        %result = call double @llvm.log2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log2.f32(float)
+
+define float @log2f(float %F) {
+; CHECK: log2f:
+; CHECK: bl log2f
+        %result = call float @llvm.log2.f32(float %F)
+	ret float %result
+}
+
+define double @pow(double %F, double %power) {
+; CHECK: pow:
+; CHECK: bl pow
+        %result = call double @llvm.pow.f64(double %F, double %power)
+	ret double %result
+}
+
+declare float @llvm.pow.f32(float, float)
+
+define float @powf(float %F, float %power) {
+; CHECK: powf:
+; CHECK: bl powf
+        %result = call float @llvm.pow.f32(float %F, float %power)
+	ret float %result
+}
+
+define double @powi(double %F, i32 %power) {
+; CHECK: powi:
+; CHECK: bl __powidf2
+        %result = call double @llvm.powi.f64(double %F, i32 %power)
+	ret double %result
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @powif(float %F, i32 %power) {
+; CHECK: powif:
+; CHECK: bl __powisf2
+        %result = call float @llvm.powi.f32(float %F, i32 %power)
+	ret float %result
+}
+
+define double @sin(double %F) {
+; CHECK: sin:
+; CHECK: bl sin
+        %result = call double @llvm.sin.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sin.f32(float)
+
+define float @sinf(float %F) {
+; CHECK: sinf:
+; CHECK: bl sinf
+        %result = call float @llvm.sin.f32(float %F)
+	ret float %result
+}
+
+define double @sqrt(double %F) {
+; CHECK: sqrt:
+; CHECK: bl sqrt
+        %result = call double @llvm.sqrt.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @sqrtf(float %F) {
+; CHECK: sqrtf:
+; CHECK: bl sqrtf
+        %result = call float @llvm.sqrt.f32(float %F)
+	ret float %result
+}
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index e3dd3dd..d442a19 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=xcore | grep "xor" | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i1 @test(double %F) nounwind {
 entry:
+; CHECK: test:
+; CHECK: xor
 	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
 	ret i1 %1
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index ecab65c..ec46071 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "get r11, id" %t1.s | count 1 
+; RUN: llc < %s -march=xcore | FileCheck %s
 declare i32 @llvm.xcore.getid()
 
 define i32 @test() {
+; CHECK: test:
+; CHECK: get r11, id
+; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.getid()
 	ret i32 %result
 }
diff --git a/test/CodeGen/XCore/global_negative_offset.ll b/test/CodeGen/XCore/global_negative_offset.ll
new file mode 100644
index 0000000..0328fb0
--- /dev/null
+++ b/test/CodeGen/XCore/global_negative_offset.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; Don't fold negative offsets into cp / dp accesses to avoid a relocation
+; error if the address + addend is less than the start of the cp / dp.
+
+@a = external constant [0 x i32], section ".cp.rodata"
+@b = external global [0 x i32]
+
+define i32 *@f() nounwind {
+entry:
+; CHECK: f:
+; CHECK: ldaw r11, cp[a]
+; CHECK: sub r0, r11, 4
+	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
+	ret i32* %0
+}
+
+define i32 *@g() nounwind {
+entry:
+; CHECK: g:
+; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
+; CHECK: sub r0, [[REG]], 4
+	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
+	ret i32* %0
+}
diff --git a/test/CodeGen/XCore/ladd_lsub_combine.ll b/test/CodeGen/XCore/ladd_lsub_combine.ll
index a693ee2..cd89966 100644
--- a/test/CodeGen/XCore/ladd_lsub_combine.ll
+++ b/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+; RUN: llc -march=xcore < %s | FileCheck %s
 
 ; Only needs one ladd
 define i64 @f1(i32 %x, i32 %y) nounwind {
diff --git a/test/CodeGen/XCore/licm-ldwcp.ll b/test/CodeGen/XCore/licm-ldwcp.ll
index 4884f70..794c6bb 100644
--- a/test/CodeGen/XCore/licm-ldwcp.ll
+++ b/test/CodeGen/XCore/licm-ldwcp.ll
@@ -13,6 +13,6 @@ entry:
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
-  volatile store i32 525509670, i32* %p, align 4
+  store volatile i32 525509670, i32* %p, align 4
   br label %bb
 }
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
new file mode 100644
index 0000000..f8726af
--- /dev/null
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
index adfea21..faff03b 100644
--- a/test/CodeGen/XCore/load.ll
+++ b/test/CodeGen/XCore/load.ll
@@ -1,15 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: not grep zext %t1.s
-; RUN: not grep sext %t1.s
-; RUN: grep "ldw" %t1.s | count 2
-; RUN: grep "ld16s" %t1.s | count 1
-; RUN: grep "ld8u" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define i32 @load32(i32* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load32:
+; CHECK: ldw r0, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -17,6 +11,8 @@ entry:
 
 define i32 @load32_imm(i32* %p) nounwind {
 entry:
+; CHECK: load32_imm:
+; CHECK: ldw r0, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -24,6 +20,9 @@ entry:
 
 define i32 @load16(i16* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load16:
+; CHECK: ld16s r0, r0[r1]
+; CHECK-NOT: sext
 	%0 = getelementptr i16* %p, i32 %offset
 	%1 = load i16* %0, align 2
 	%2 = sext i16 %1 to i32
@@ -32,6 +31,9 @@ entry:
 
 define i32 @load8(i8* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load8:
+; CHECK: ld8u r0, r0[r1]
+; CHECK-NOT: zext
 	%0 = getelementptr i8* %p, i32 %offset
 	%1 = load i8* %0, align 1
 	%2 = zext i8 %1 to i32
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
deleted file mode 100644
index a08471f..0000000
--- a/test/CodeGen/XCore/log.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl logf" %t1.s | count 1
-; RUN: grep "bl log" %t1.s | count 2
-declare double @llvm.log.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
deleted file mode 100644
index a72b8bf..0000000
--- a/test/CodeGen/XCore/log10.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log10f" %t1.s | count 1
-; RUN: grep "bl log10" %t1.s | count 2
-declare double @llvm.log10.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log10.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log10.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log10.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
deleted file mode 100644
index d257433..0000000
--- a/test/CodeGen/XCore/log2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log2f" %t1.s | count 1
-; RUN: grep "bl log2" %t1.s | count 2
-declare double @llvm.log2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
deleted file mode 100644
index b461185..0000000
--- a/test/CodeGen/XCore/pow.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl powf" %t1.s | count 1
-; RUN: grep "bl pow" %t1.s | count 2
-declare double @llvm.pow.f64(double, double)
-
-define double @test(double %F, double %power) {
-        %result = call double @llvm.pow.f64(double %F, double %power)
-	ret double %result
-}
-
-declare float @llvm.pow.f32(float, float)
-
-define float @testf(float %F, float %power) {
-        %result = call float @llvm.pow.f32(float %F, float %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
deleted file mode 100644
index de31cbe..0000000
--- a/test/CodeGen/XCore/powi.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __powidf2" %t1.s | count 1
-; RUN: grep "bl __powisf2" %t1.s | count 1
-declare double @llvm.powi.f64(double, i32)
-
-define double @test(double %F, i32 %power) {
-        %result = call double @llvm.powi.f64(double %F, i32 %power)
-	ret double %result
-}
-
-declare float @llvm.powi.f32(float, i32)
-
-define float @testf(float %F, i32 %power) {
-        %result = call float @llvm.powi.f32(float %F, i32 %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 537d63b..80b7db4 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,19 +1,21 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=xcore > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ldw.*\.Lbaz %t
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define private void @foo() {
+; CHECK: .Lfoo:
         ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: ldw r0, dp[.Lbaz]
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
 }
+
+; CHECK: .Lbaz:
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
index 3181e96..5b612d0 100644
--- a/test/CodeGen/XCore/scavenging.ll
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -18,32 +18,32 @@ entry:
 	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
 	%0 = load i32* @size, align 4		; <i32> [#uses=1]
 	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
-	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
-	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
-	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
-	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
-	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
-	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
-	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
-	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
-	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
-	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
-	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
-	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = load volatile i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = load volatile i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = load volatile i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = load volatile i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = load volatile i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = load volatile i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = load volatile i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = load volatile i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = load volatile i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = load volatile i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = load volatile i32* @g11, align 4		; <i32> [#uses=2]
 	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
 	store i32 %13, i32* %14, align 4
-	volatile store i32 %13, i32* @g11, align 4
-	volatile store i32 %12, i32* @g10, align 4
-	volatile store i32 %11, i32* @g9, align 4
-	volatile store i32 %10, i32* @g8, align 4
-	volatile store i32 %9, i32* @g7, align 4
-	volatile store i32 %8, i32* @g6, align 4
-	volatile store i32 %7, i32* @g5, align 4
-	volatile store i32 %6, i32* @g4, align 4
-	volatile store i32 %5, i32* @g3, align 4
-	volatile store i32 %4, i32* @g2, align 4
-	volatile store i32 %3, i32* @g1, align 4
-	volatile store i32 %2, i32* @g0, align 4
+	store volatile i32 %13, i32* @g11, align 4
+	store volatile i32 %12, i32* @g10, align 4
+	store volatile i32 %11, i32* @g9, align 4
+	store volatile i32 %10, i32* @g8, align 4
+	store volatile i32 %9, i32* @g7, align 4
+	store volatile i32 %8, i32* @g6, align 4
+	store volatile i32 %7, i32* @g5, align 4
+	store volatile i32 %6, i32* @g4, align 4
+	store volatile i32 %5, i32* @g3, align 4
+	store volatile i32 %4, i32* @g2, align 4
+	store volatile i32 %3, i32* @g1, align 4
+	store volatile i32 %2, i32* @g0, align 4
 	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @g(i32* %x1, i32* %1) nounwind
 	ret void
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
deleted file mode 100644
index ced026f..0000000
--- a/test/CodeGen/XCore/sin.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sinf" %t1.s | count 1
-; RUN: grep "bl sin" %t1.s | count 2
-declare double @llvm.sin.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sin.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sin.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sin.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
deleted file mode 100644
index 364d1a1..0000000
--- a/test/CodeGen/XCore/sqrt.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sqrtf" %t1.s | count 1
-; RUN: grep "bl sqrt" %t1.s | count 2
-declare double @llvm.sqrt.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sqrt.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sqrt.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sqrt.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
index 2213743..836b125 100644
--- a/test/CodeGen/XCore/store.ll
+++ b/test/CodeGen/XCore/store.ll
@@ -1,13 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: grep "stw" %t1.s | count 2
-; RUN: grep "st16" %t1.s | count 1
-; RUN: grep "st8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
 entry:
+; CHECK: store32:
+; CHECK: stw r2, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -15,6 +11,8 @@ entry:
 
 define void @store32_imm(i32* %p, i32 %val) nounwind {
 entry:
+; CHECK: store32_imm:
+; CHECK: stw r1, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -22,6 +20,8 @@ entry:
 
 define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
 entry:
+; CHECK: store16:
+; CHECK: st16 r2, r0[r1]
 	%0 = getelementptr i16* %p, i32 %offset
 	store i16 %val, i16* %0, align 2
 	ret void
@@ -29,6 +29,8 @@ entry:
 
 define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
 entry:
+; CHECK: store8:
+; CHECK: st8 r2, r0[r1]
 	%0 = getelementptr i8* %p, i32 %offset
 	store i8 %val, i8* %0, align 1
 	ret void
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index 45f886d..eb71cb6 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "ecallf" %t1.s | count 1
-; RUN: grep "ldc" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i32 @test() noreturn nounwind  {
 entry:
+; CHECK: test:
+; CHECK: ldc
+; CHECK: ecallf
 	tail call void @llvm.trap( )
 	unreachable
 }
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
index 493ca6a..c997b78 100644
--- a/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl memmove" %t1.s | count 1
-; RUN: grep "ldc r., 8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 ; Unaligned load / store pair. Should be combined into a memmove
 ; of size 8
 define void @f(i64* %dst, i64* %src) nounwind {
 entry:
+; CHECK: f:
+; CHECK: ldc r2, 8
+; CHECK: bl memmove
 	%0 = load i64* %src, align 1
 	store i64 %0, i64* %dst, align 1
 	ret void