Vendor import of llvm r114020 (from the release_28 branch):

http://llvm.org/svn/llvm-project/llvm/branches/release_28@114020 Approved by: rpaulo (mentor)
author: dim <dim@FreeBSD.org> 2010-09-17 15:48:55 +0000
committer: dim <dim@FreeBSD.org> 2010-09-17 15:48:55 +0000
commit: 5d5cc59cc77afe655b3707cb0e69e0827b444cad (patch)
tree: 36453626c792cccd91f783a38a169d610a6b9db9 /test/CodeGen/X86
parent: 786a18553586229ad99ecb5ecde8a9d914c45e27 (diff)
download: FreeBSD-src-5d5cc59cc77afe655b3707cb0e69e0827b444cad.zip
FreeBSD-src-5d5cc59cc77afe655b3707cb0e69e0827b444cad.tar.gz
88 files changed, 4389 insertions, 752 deletions
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 2d7bd27..35b0159 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 | grep setnp
-; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
deleted file mode 100644
index 2680b15..0000000
--- a/test/CodeGen/X86/2007-06-14-branchfold.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
-; check that branch folding understands FP_REG_KILL is not a branch
-
-target triple = "i686-pc-linux-gnu"
-  %struct.FRAME.c34003a = type { float, float }
-@report_E = global i8 0   ; <i8*> [#uses=0]
-
-define void @main() {
-entry:
-  %FRAME.31 = alloca %struct.FRAME.c34003a, align 8   ; <%struct.FRAME.c34003a*> [#uses=4]
-  %tmp20 = call i32 @report__ident_int( i32 -50 )   ; <i32> [#uses=1]
-  %tmp2021 = sitofp i32 %tmp20 to float   ; <float> [#uses=5]
-  %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
-  %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
-  %bothcond = or i1 %tmp23, %tmp26    ; <i1> [#uses=1]
-  br i1 %bothcond, label %bb, label %bb30
-
-bb:   ; preds = %entry
-  unwind
-
-bb30:   ; preds = %entry
-  %tmp35 = call i32 @report__ident_int( i32 50 )    ; <i32> [#uses=1]
-  %tmp3536 = sitofp i32 %tmp35 to float   ; <float> [#uses=4]
-  %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
-  %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
-  %bothcond226 = or i1 %tmp38, %tmp44   ; <i1> [#uses=1]
-  br i1 %bothcond226, label %bb47, label %bb49
-
-bb47:   ; preds = %bb30
-  unwind
-
-bb49:   ; preds = %bb30
-  %tmp60 = fcmp ult float %tmp3536, %tmp2021    ; <i1> [#uses=1]
-  %tmp60.not = xor i1 %tmp60, true    ; <i1> [#uses=1]
-  %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
-  %bothcond227 = and i1 %tmp65, %tmp60.not    ; <i1> [#uses=1]
-  br i1 %bothcond227, label %cond_true68, label %cond_next70
-
-cond_true68:    ; preds = %bb49
-  unwind
-
-cond_next70:    ; preds = %bb49
-  %tmp71 = call i32 @report__ident_int( i32 -30 )   ; <i32> [#uses=1]
-  %tmp7172 = sitofp i32 %tmp71 to float   ; <float> [#uses=3]
-  %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
-  %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
-  %bothcond228 = or i1 %tmp74, %tmp80   ; <i1> [#uses=1]
-  br i1 %bothcond228, label %bb83, label %bb85
-
-bb83:   ; preds = %cond_next70
-  unwind
-
-bb85:   ; preds = %cond_next70
-  %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1   ; <float*> [#uses=3]
-  store float %tmp7172, float* %tmp90
-  %tmp92 = call i32 @report__ident_int( i32 30 )    ; <i32> [#uses=1]
-  %tmp9293 = sitofp i32 %tmp92 to float   ; <float> [#uses=7]
-  %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
-  %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000   ; <i1> [#uses=1]
-  %bothcond229 = or i1 %tmp95, %tmp101    ; <i1> [#uses=1]
-  br i1 %bothcond229, label %bb104, label %bb106
-
-bb104:    ; preds = %bb85
-  unwind
-
-bb106:    ; preds = %bb85
-  %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0    ; <float*> [#uses=2]
-  store float %tmp9293, float* %tmp111
-  %tmp123 = load float* %tmp90    ; <float> [#uses=4]
-  %tmp125 = fcmp ult float %tmp9293, %tmp123    ; <i1> [#uses=1]
-  br i1 %tmp125, label %cond_next147, label %cond_true128
-
-cond_true128:   ; preds = %bb106
-  %tmp133 = fcmp olt float %tmp123, %tmp2021    ; <i1> [#uses=1]
-  %tmp142 = fcmp ogt float %tmp9293, %tmp3536   ; <i1> [#uses=1]
-  %bothcond230 = or i1 %tmp133, %tmp142   ; <i1> [#uses=1]
-  br i1 %bothcond230, label %bb145, label %cond_next147
-
-bb145:    ; preds = %cond_true128
-  unwind
-
-cond_next147:   ; preds = %cond_true128, %bb106
-  %tmp157 = fcmp ugt float %tmp123, -3.000000e+01   ; <i1> [#uses=1]
-  %tmp165 = fcmp ult float %tmp9293, -3.000000e+01    ; <i1> [#uses=1]
-  %bothcond231 = or i1 %tmp157, %tmp165   ; <i1> [#uses=1]
-  br i1 %bothcond231, label %bb168, label %bb169
-
-bb168:    ; preds = %cond_next147
-  unwind
-
-bb169:    ; preds = %cond_next147
-  %tmp176 = fcmp ugt float %tmp123, 3.000000e+01    ; <i1> [#uses=1]
-  %tmp184 = fcmp ult float %tmp9293, 3.000000e+01   ; <i1> [#uses=1]
-  %bothcond232 = or i1 %tmp176, %tmp184   ; <i1> [#uses=1]
-  br i1 %bothcond232, label %bb187, label %bb188
-
-bb187:    ; preds = %bb169
-  unwind
-
-bb188:    ; preds = %bb169
-  %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 )   ; <float> [#uses=2]
-  %tmp194 = load float* %tmp90    ; <float> [#uses=1]
-  %tmp196 = fcmp ugt float %tmp194, 0.000000e+00    ; <i1> [#uses=1]
-  br i1 %tmp196, label %bb207, label %cond_next200
-
-cond_next200:   ; preds = %bb188
-  %tmp202 = load float* %tmp111   ; <float> [#uses=1]
-  %tmp204 = fcmp ult float %tmp202, 0.000000e+00    ; <i1> [#uses=1]
-  br i1 %tmp204, label %bb207, label %bb208
-
-bb207:    ; preds = %cond_next200, %bb188
-  unwind
-
-bb208:    ; preds = %cond_next200
-  %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 )   ; <float> [#uses=1]
-  %tmp214 = fcmp oge float %tmp212, %tmp192   ; <i1> [#uses=1]
-  %tmp217 = fcmp oge float %tmp192, 1.000000e+02    ; <i1> [#uses=1]
-  %tmp221 = or i1 %tmp214, %tmp217    ; <i1> [#uses=1]
-  br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock
-
-cond_true224:   ; preds = %bb208
-  call void @abort( ) noreturn
-  ret void
-
-UnifiedReturnBlock:   ; preds = %bb208
-  ret void
-}
-
-declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x) 
-
-declare i32 @report__ident_int(i32 %x)
-
-declare void @abort() noreturn
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
deleted file mode 100644
index b936686..0000000
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=x86 | grep nop
-target triple = "i686-apple-darwin8"
-
-
-define void @bork() noreturn nounwind  {
-entry:
-        unreachable
-}
diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
new file mode 100644
index 0000000..99cb856
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
+
+@.str = internal constant [4 x i8] c"%d\0A\00"
+
+declare i32 @printf(i8* noalias , ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+        br label %forbody
+
+forbody:
+        %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]                ; <i32>[#uses=3]
+        %sub14 = sub i32 1027, %i.0             ; <i32> [#uses=1]
+        %mul15 = mul i32 %sub14, 10             ; <i32> [#uses=1]
+        %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
+        call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
+        %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
+        %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
+        br i1 %cmp, label %forbody, label %afterfor
+
+afterfor:               ; preds = %forcond
+        ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
new file mode 100644
index 0000000..36cc535
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s
+; This used to crash.
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @parse_number(i8* nocapture %p) nounwind {
+entry:
+        %shift.0 = select i1 false, i32 4, i32 2                ; <i32> [#uses=1]
+        br label %bb47
+
+bb47:           ; preds = %bb47, %entry
+        br i1 false, label %bb54, label %bb47
+
+bb54:           ; preds = %bb47
+        br i1 false, label %bb56, label %bb66
+
+bb56:           ; preds = %bb62, %bb54
+        %p_addr.0.pn.rec = phi i64 [ %p_addr.6.rec, %bb62 ], [ 0, %bb54 ]             ; <i64> [#uses=2]
+        %ch.6.in.in = phi i8* [ %p_addr.6, %bb62 ], [ null, %bb54 ]           ; <i8*> [#uses=0]
+        %indvar202 = trunc i64 %p_addr.0.pn.rec to i32          ; <i32>[#uses=1]
+        %frac_bits.0 = mul i32 %indvar202, %shift.0             ; <i32>[#uses=1]
+        %p_addr.6.rec = add i64 %p_addr.0.pn.rec, 1             ; <i64>[#uses=2]
+        %p_addr.6 = getelementptr i8* null, i64 %p_addr.6.rec           ; <i8*>[#uses=1]
+        br i1 false, label %bb66, label %bb62
+
+bb62:           ; preds = %bb56
+        br label %bb56
+
+bb66:           ; preds = %bb56, %bb54
+        %frac_bits.1 = phi i32 [ 0, %bb54 ], [ %frac_bits.0, %bb56 ]           ; <i32> [#uses=0]
+        unreachable
+}
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 4a97ac3..bb01e5a 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index 8d42627..2853930 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 ; Check the register copy comes after the call to f and before the call to g
 ; PR3784
 
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index da493d4..b13d33e 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s
 ; Check that register copies in the landing pad come after the EH_LABEL
 
 declare i32 @f()
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index b5873ba..90dabb8 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
new file mode 100644
index 0000000..85ee091
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; Currently, dbg.declare generates a DEBUG_VALUE comment.  Eventually it will
+; generate DWARF and this test will need to be modified or removed.
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+;CHECK: DEBUG_VALUE
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
new file mode 100644
index 0000000..2113263
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O1 < %s
+; ModuleID = 'pr6157.bc'
+; formerly crashed in SelectionDAGBuilder
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
new file mode 100644
index 0000000..d211549
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -0,0 +1,239 @@
+; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
+; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+
+%0 = type { double }
+
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{float %a}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{float %b}, i64 0, metadata !11)
+  tail call void @llvm.dbg.value(metadata !{float %c}, i64 0, metadata !12)
+  tail call void @llvm.dbg.value(metadata !{float %d}, i64 0, metadata !13)
+  %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb1, !dbg !19
+
+bb:                                               ; preds = %entry
+  %3 = fdiv float %c, %d, !dbg !20                ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %3}, i64 0, metadata !16), !dbg !20
+  %4 = fmul float %3, %c, !dbg !21                ; <float> [#uses=1]
+  %5 = fadd float %4, %d, !dbg !21                ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %5}, i64 0, metadata !14), !dbg !21
+  %6 = fmul float %3, %a, !dbg !22                ; <float> [#uses=1]
+  %7 = fadd float %6, %b, !dbg !22                ; <float> [#uses=1]
+  %8 = fdiv float %7, %5, !dbg !22                ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %8}, i64 0, metadata !17), !dbg !22
+  %9 = fmul float %3, %b, !dbg !23                ; <float> [#uses=1]
+  %10 = fsub float %9, %a, !dbg !23               ; <float> [#uses=1]
+  %11 = fdiv float %10, %5, !dbg !23              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %11}, i64 0, metadata !18), !dbg !23
+  br label %bb2, !dbg !23
+
+bb1:                                              ; preds = %entry
+  %12 = fdiv float %d, %c, !dbg !24               ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %12}, i64 0, metadata !16), !dbg !24
+  %13 = fmul float %12, %d, !dbg !25              ; <float> [#uses=1]
+  %14 = fadd float %13, %c, !dbg !25              ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %14}, i64 0, metadata !14), !dbg !25
+  %15 = fmul float %12, %b, !dbg !26              ; <float> [#uses=1]
+  %16 = fadd float %15, %a, !dbg !26              ; <float> [#uses=1]
+  %17 = fdiv float %16, %14, !dbg !26             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %17}, i64 0, metadata !17), !dbg !26
+  %18 = fmul float %12, %a, !dbg !27              ; <float> [#uses=1]
+  %19 = fsub float %b, %18, !dbg !27              ; <float> [#uses=1]
+  %20 = fdiv float %19, %14, !dbg !27             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %20}, i64 0, metadata !18), !dbg !27
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %y.0 = phi float [ %11, %bb ], [ %20, %bb1 ]    ; <float> [#uses=5]
+  %x.0 = phi float [ %8, %bb ], [ %17, %bb1 ]     ; <float> [#uses=5]
+  %21 = fcmp uno float %x.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %22 = fcmp uno float %y.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %or.cond = and i1 %21, %22                      ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb4, label %bb46, !dbg !28
+
+bb4:                                              ; preds = %bb2
+  %23 = fcmp une float %c, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %24 = fcmp une float %d, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond93 = or i1 %23, %24                     ; <i1> [#uses=1]
+  br i1 %or.cond93, label %bb9, label %bb6, !dbg !29
+
+bb6:                                              ; preds = %bb4
+  %25 = fcmp uno float %a, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %26 = fcmp uno float %b, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond94 = and i1 %25, %26                    ; <i1> [#uses=1]
+  br i1 %or.cond94, label %bb9, label %bb8, !dbg !29
+
+bb8:                                              ; preds = %bb6
+  %27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
+  %28 = fmul float %27, %a, !dbg !30              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %28}, i64 0, metadata !17), !dbg !30
+  %29 = fmul float %27, %b, !dbg !31              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %29}, i64 0, metadata !18), !dbg !31
+  br label %bb46, !dbg !31
+
+bb9:                                              ; preds = %bb6, %bb4
+  %30 = fcmp ord float %a, 0.000000e+00           ; <i1> [#uses=1]
+  %31 = fsub float %a, %a, !dbg !32               ; <float> [#uses=3]
+  %32 = fcmp uno float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %33 = and i1 %30, %32, !dbg !32                 ; <i1> [#uses=2]
+  br i1 %33, label %bb14, label %bb11, !dbg !32
+
+bb11:                                             ; preds = %bb9
+  %34 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %35 = fsub float %b, %b, !dbg !32               ; <float> [#uses=1]
+  %36 = fcmp uno float %35, 0.000000e+00          ; <i1> [#uses=1]
+  %37 = and i1 %34, %36, !dbg !32                 ; <i1> [#uses=1]
+  br i1 %37, label %bb14, label %bb27, !dbg !32
+
+bb14:                                             ; preds = %bb11, %bb9
+  %38 = fsub float %c, %c, !dbg !32               ; <float> [#uses=1]
+  %39 = fcmp ord float %38, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %39, label %bb15, label %bb27, !dbg !32
+
+bb15:                                             ; preds = %bb14
+  %40 = fsub float %d, %d, !dbg !32               ; <float> [#uses=1]
+  %41 = fcmp ord float %40, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %41, label %bb16, label %bb27, !dbg !32
+
+bb16:                                             ; preds = %bb15
+  %iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %42}, i64 0, metadata !0), !dbg !33
+  %43 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %44 = fsub float %b, %b, !dbg !34               ; <float> [#uses=1]
+  %45 = fcmp uno float %44, 0.000000e+00          ; <i1> [#uses=1]
+  %46 = and i1 %43, %45, !dbg !34                 ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %47}, i64 0, metadata !11), !dbg !34
+  %48 = fmul float %42, %c, !dbg !35              ; <float> [#uses=1]
+  %49 = fmul float %47, %d, !dbg !35              ; <float> [#uses=1]
+  %50 = fadd float %48, %49, !dbg !35             ; <float> [#uses=1]
+  %51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %51}, i64 0, metadata !17), !dbg !35
+  %52 = fmul float %47, %c, !dbg !36              ; <float> [#uses=1]
+  %53 = fmul float %42, %d, !dbg !36              ; <float> [#uses=1]
+  %54 = fsub float %52, %53, !dbg !36             ; <float> [#uses=1]
+  %55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %55}, i64 0, metadata !18), !dbg !36
+  br label %bb46, !dbg !36
+
+bb27:                                             ; preds = %bb15, %bb14, %bb11
+  %56 = fcmp ord float %c, 0.000000e+00           ; <i1> [#uses=1]
+  %57 = fsub float %c, %c, !dbg !37               ; <float> [#uses=1]
+  %58 = fcmp uno float %57, 0.000000e+00          ; <i1> [#uses=1]
+  %59 = and i1 %56, %58, !dbg !37                 ; <i1> [#uses=2]
+  br i1 %59, label %bb33, label %bb30, !dbg !37
+
+bb30:                                             ; preds = %bb27
+  %60 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %61 = fsub float %d, %d, !dbg !37               ; <float> [#uses=1]
+  %62 = fcmp uno float %61, 0.000000e+00          ; <i1> [#uses=1]
+  %63 = and i1 %60, %62, !dbg !37                 ; <i1> [#uses=1]
+  %64 = fcmp ord float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %or.cond95 = and i1 %63, %64                    ; <i1> [#uses=1]
+  br i1 %or.cond95, label %bb34, label %bb46, !dbg !37
+
+bb33:                                             ; preds = %bb27
+  %.old = fcmp ord float %31, 0.000000e+00        ; <i1> [#uses=1]
+  br i1 %.old, label %bb34, label %bb46, !dbg !37
+
+bb34:                                             ; preds = %bb33, %bb30
+  %65 = fsub float %b, %b, !dbg !37               ; <float> [#uses=1]
+  %66 = fcmp ord float %65, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %66, label %bb35, label %bb46, !dbg !37
+
+bb35:                                             ; preds = %bb34
+  %iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %67}, i64 0, metadata !12), !dbg !38
+  %68 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %69 = fsub float %d, %d, !dbg !39               ; <float> [#uses=1]
+  %70 = fcmp uno float %69, 0.000000e+00          ; <i1> [#uses=1]
+  %71 = and i1 %68, %70, !dbg !39                 ; <i1> [#uses=1]
+  %iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %72}, i64 0, metadata !13), !dbg !39
+  %73 = fmul float %67, %a, !dbg !40              ; <float> [#uses=1]
+  %74 = fmul float %72, %b, !dbg !40              ; <float> [#uses=1]
+  %75 = fadd float %73, %74, !dbg !40             ; <float> [#uses=1]
+  %76 = fmul float %75, 0.000000e+00, !dbg !40    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %76}, i64 0, metadata !17), !dbg !40
+  %77 = fmul float %67, %b, !dbg !41              ; <float> [#uses=1]
+  %78 = fmul float %72, %a, !dbg !41              ; <float> [#uses=1]
+  %79 = fsub float %77, %78, !dbg !41             ; <float> [#uses=1]
+  %80 = fmul float %79, 0.000000e+00, !dbg !41    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %80}, i64 0, metadata !18), !dbg !41
+  br label %bb46, !dbg !41
+
+bb46:                                             ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
+  %y.1 = phi float [ %80, %bb35 ], [ %y.0, %bb34 ], [ %y.0, %bb33 ], [ %y.0, %bb30 ], [ %55, %bb16 ], [ %29, %bb8 ], [ %y.0, %bb2 ] ; <float> [#uses=2]
+  %x.1 = phi float [ %76, %bb35 ], [ %x.0, %bb34 ], [ %x.0, %bb33 ], [ %x.0, %bb30 ], [ %51, %bb16 ], [ %28, %bb8 ], [ %x.0, %bb2 ] ; <float> [#uses=1]
+  %81 = fmul float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %82 = fadd float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %tmpr = fadd float %x.1, %81, !dbg !42          ; <float> [#uses=1]
+  %tmp89 = bitcast float %tmpr to i32             ; <i32> [#uses=1]
+  %tmp90 = zext i32 %tmp89 to i64                 ; <i64> [#uses=1]
+  %tmp85 = bitcast float %82 to i32               ; <i32> [#uses=1]
+  %tmp86 = zext i32 %tmp85 to i64                 ; <i64> [#uses=1]
+  %tmp87 = shl i64 %tmp86, 32                     ; <i64> [#uses=1]
+  %ins = or i64 %tmp90, %tmp87                    ; <i64> [#uses=1]
+  %tmp84 = bitcast i64 %ins to double             ; <double> [#uses=1]
+  %mrv75 = insertvalue %0 undef, double %tmp84, 0, !dbg !42 ; <%0> [#uses=1]
+  ret %0 %mrv75, !dbg !42
+}
+
+declare float @fabsf(float)
+
+declare float @copysignf(float, float) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 1929, i32 0, metadata !15, null}
+!20 = metadata !{i32 1931, i32 0, metadata !15, null}
+!21 = metadata !{i32 1932, i32 0, metadata !15, null}
+!22 = metadata !{i32 1933, i32 0, metadata !15, null}
+!23 = metadata !{i32 1934, i32 0, metadata !15, null}
+!24 = metadata !{i32 1938, i32 0, metadata !15, null}
+!25 = metadata !{i32 1939, i32 0, metadata !15, null}
+!26 = metadata !{i32 1940, i32 0, metadata !15, null}
+!27 = metadata !{i32 1941, i32 0, metadata !15, null}
+!28 = metadata !{i32 1946, i32 0, metadata !15, null}
+!29 = metadata !{i32 1948, i32 0, metadata !15, null}
+!30 = metadata !{i32 1950, i32 0, metadata !15, null}
+!31 = metadata !{i32 1951, i32 0, metadata !15, null}
+!32 = metadata !{i32 1953, i32 0, metadata !15, null}
+!33 = metadata !{i32 1955, i32 0, metadata !15, null}
+!34 = metadata !{i32 1956, i32 0, metadata !15, null}
+!35 = metadata !{i32 1957, i32 0, metadata !15, null}
+!36 = metadata !{i32 1958, i32 0, metadata !15, null}
+!37 = metadata !{i32 1960, i32 0, metadata !15, null}
+!38 = metadata !{i32 1962, i32 0, metadata !15, null}
+!39 = metadata !{i32 1963, i32 0, metadata !15, null}
+!40 = metadata !{i32 1964, i32 0, metadata !15, null}
+!41 = metadata !{i32 1965, i32 0, metadata !15, null}
+!42 = metadata !{i32 1969, i32 0, metadata !15, null}
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
new file mode 100644
index 0000000..80643d0
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -0,0 +1,44 @@
+; RUN: llc  -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; Test to check separate label for inlined function argument.
+
+define i32 @foo(i32 %y) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !0)
+  %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
+  ret i32 %0, !dbg !9
+}
+
+declare i32 @zoo(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @bar(i32 %x) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !7)
+  tail call void @llvm.dbg.value(metadata !11, i64 0, metadata !0) nounwind
+  %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
+  ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.lv = !{!0, !7}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 3, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 1}
+!12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
+!13 = metadata !{i32 7, i32 0, metadata !14, null}
+!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+
+;CHECK:	        DEBUG_VALUE: bar:x <- EBX+0
+;CHECK-NEXT:Ltmp
+;CHECK-NEXT:	DEBUG_VALUE: foo:y <- 1+0
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
new file mode 100644
index 0000000..812d372
--- /dev/null
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -0,0 +1,53 @@
+; RUN: llc -O2 < %s | FileCheck %s 
+; Test to check that unused argument 'this' is not undefined in debug info.
+
+target triple = "x86_64-apple-darwin10.2"
+%struct.foo = type { i32 }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
+;CHECK: DEBUG_VALUE: baz:this <- RDI+0
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.foo* %this}, i64 0, metadata !15)
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !16)
+  %0 = mul nsw i32 %x, 7, !dbg !29                ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 1, !dbg !29                ; <i32> [#uses=1]
+  ret i32 %1, !dbg !29
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524307, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{i32 524329, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{metadata !6, metadata !1, metadata !8}
+!6 = metadata !{i32 524301, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !7, metadata !11, metadata !7}
+!11 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 524326, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 524545, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 524545, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{metadata !7, metadata !7, metadata !21}
+!21 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524324, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 524545, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524299, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 524544, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 16, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
new file mode 100644
index 0000000..be7d94c
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=i486
+; PR7375
+;
+; This function contains a block (while.cond) with a lonely RFP use that is
+; not a kill. We still need an FP_REG_KILL for that block since the register
+; allocator will insert a reload.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
+entry:
+  %tmp2 = load double* %t                         ; <double> [#uses=1]
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %bb.nph
+
+while.cond:                                       ; preds = %bb.nph, %while.cond
+  store double %tmp2, double* undef
+  br i1 undef, label %if.end, label %while.cond
+
+bb.nph:                                           ; preds = %if.then
+  br label %while.cond
+
+if.end:                                           ; preds = %while.cond, %if.then, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll
new file mode 100644
index 0000000..3ac4cf5
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-15-Crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null
+; PR7653
+
+@__FUNCTION__.1623 = external constant [4 x i8]   ; <[4 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
new file mode 100644
index 0000000..96016cf
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i32 @extend2bit_v2(i32 %val) {
+entry:
+  %0 = trunc i32 %val to i2                       ; <i2> [#uses=1]
+  %1 = sext i2 %0 to i32                          ; <i32> [#uses=1]
+  %2 = icmp eq i32 %1, 3                          ; <i1> [#uses=1]
+  %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+; CHECK: extend2bit_v2:
+; CHECK: xorl	%eax, %eax
+; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
new file mode 100644
index 0000000..1919d2e
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR7814
+
+@g_16 = global i64 -3738643449681751625, align 8  ; <i64*> [#uses=1]
+@g_38 = global i32 0, align 4                     ; <i32*> [#uses=2]
+@.str = private constant [4 x i8] c"%d\0A\00"     ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+  %tmp = load i64* @g_16                          ; <i64> [#uses=1]
+  %not.lnot = icmp ne i64 %tmp, 0                 ; <i1> [#uses=1]
+  %conv = sext i1 %not.lnot to i64                ; <i64> [#uses=1]
+  %and = and i64 %conv, 150                       ; <i64> [#uses=1]
+  %conv.i = trunc i64 %and to i8                  ; <i8> [#uses=1]
+  %cmp = icmp sgt i8 %conv.i, 0                   ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+; CHECK: andl	$150
+; CHECK-NEXT: testb
+; CHECK-NEXT: jg
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  %tmp4.pre = load i32* @g_38                     ; <i32> [#uses=1]
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @g_38
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
+  %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
new file mode 100644
index 0000000..98a0887
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32
+
+define void @func() nounwind {
+invoke.cont:
+  %call = tail call i8* @malloc()
+  %a = invoke i32 @bar()
+          to label %bb1 unwind label %lpad
+
+bb1:
+  ret void
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %ehspec.fails = icmp slt i32 %eh.selector, 0
+  br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
+
+cleanup:
+  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
+  unreachable
+
+ehspec.unexpected:
+  tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
+  unreachable
+}
+
+declare noalias i8* @malloc()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+declare i32 @bar()
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
new file mode 100644
index 0000000..d98ef14
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -0,0 +1,25 @@
+; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+
+define void @foo() nounwind ssp {
+entry:
+  call void @bar(i32 201), !dbg !8
+  ret void, !dbg !8
+}
+
+declare void @bar(i32)
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!5}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
+!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 14, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
new file mode 100644
index 0000000..e5542ba
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef.
+
+define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
+  %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
+  %tmp1 = trunc i64 %x1 to i32                     ; <i32> [#uses=1]
+; CHECK: movl (%r{{.*}}), %
+  %x4 = load i32* %h, align 4                      ; <i32> [#uses=1]
+
+; The imull clobbers a 32-bit register.
+; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
+  %x5 = mul nsw i32 %x4, %tmp1                      ; <i32> [#uses=1]
+
+; So we cannot use the corresponding 64-bit register anymore.
+; CHECK-NOT: shrq $32, %r[[CLOBBER]]
+  %btmp3 = lshr i64 %x1, 32                         ; <i64> [#uses=1]
+  %btmp4 = trunc i64 %btmp3 to i32                  ; <i32> [#uses=1]
+
+; CHECK: idiv
+  %x6 = sdiv i32 %x5, %btmp4                         ; <i32> [#uses=1]
+  store i32 %x6, i32* %w, align 4
+  ret void
+}
+
+declare i64 @g(i8*, i8*)
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
index f200589..629a147 100644
--- a/test/CodeGen/X86/GC/dg.exp
+++ b/test/CodeGen/X86/GC/dg.exp
@@ -1,3 +1,5 @@
 load_lib llvm.exp
 
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
new file mode 100644
index 0000000..728e377
--- /dev/null
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink}
+
+define fastcc void @t() nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb4
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %bb
+  unreachable
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %bb4
+  br label %bb5
+
+bb5:                                              ; preds = %bb9, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1]
+  %tmp12 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %tmp13 = trunc i64 %tmp12 to i32                ; <i32> [#uses=0]
+  br i1 undef, label %bb9, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb6
+  br i1 undef, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7, %bb6, %bb5
+  br i1 undef, label %bb5, label %return
+
+return:                                           ; preds = %bb9, %bb4, %bb
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
new file mode 100644
index 0000000..a72160b
--- /dev/null
+++ b/test/CodeGen/X86/avx-128.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@z = common global <4 x float> zeroinitializer, align 16
+
+define void @zero() nounwind ssp {
+entry:
+  ; CHECK: vpxor
+  ; CHECK: vmovaps
+  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll
new file mode 100644
index 0000000..20d31e7
--- /dev/null
+++ b/test/CodeGen/X86/avx-256.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@x = common global <8 x float> zeroinitializer, align 32
+@y = common global <4 x double> zeroinitializer, align 32
+
+define void @zero() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  ; CHECK: vmovaps
+  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
+  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
new file mode 100644
index 0000000..9de9023
--- /dev/null
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -0,0 +1,2587 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdec
+  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdeclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenc
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+  ; CHECK: vaesimc
+  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; CHECK: vaeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsd
+  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordsd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvtpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+  ; CHECK: vcvtpd2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+  ; CHECK: vcvtsd2ss
+  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+  ; CHECK: vcvtss2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvttpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdivsd
+  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovdqu
+  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovupd
+  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
+
+
+define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
+  ; CHECK: pushl
+  ; CHECK: movl
+  ; CHECK: vmaskmovdqu
+  ; CHECK: popl
+  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxsd
+  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminsd
+  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntdq
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
+
+
+define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntpd
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmulsd
+  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpeqb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpeqd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpeqw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpgtb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpgtd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpgtw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+  ; CHECK: vsqrtsd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovq
+  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovdqu
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovupd
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vsubsd
+  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vlddqu
+  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpeqq
+  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+  ; CHECK: vphminposuw
+  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vroundsd
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vroundss
+  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpgtq
+  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddss
+  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordss
+  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdivss
+  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vldmxcsr
+  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxss
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminss
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntps
+  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmulss
+  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+  ; CHECK: vrcpss
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrtss
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+  ; CHECK: vsqrtss
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vstmxcsr
+  call void @llvm.x86.sse.stmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vsubss
+  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2psy
+  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvttpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
+  ; CHECK: vlddqu
+  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
+  ; CHECK: vmovdqu
+  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
+  ; CHECK: vmovupd
+  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
+  ; CHECK: vmovups
+  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+
+
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vmovntdq
+  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+
+define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovntpd
+  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovntps
+  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+
+define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+
+define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovupd
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovups
+  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
+  ; CHECK: vextractf128
+  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: setb
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_vzeroall() {
+  ; CHECK: vzeroall
+  call void @llvm.x86.avx.vzeroall()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroall() nounwind
+
+
+define void @test_x86_avx_vzeroupper() {
+  ; CHECK: vzeroupper
+  call void @llvm.x86.avx.vzeroupper()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroupper() nounwind
+
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
new file mode 100644
index 0000000..b186710
--- /dev/null
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+
diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll
new file mode 100644
index 0000000..6190c36
--- /dev/null
+++ b/test/CodeGen/X86/barrier-sse.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+	ret void
+}
diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll
new file mode 100644
index 0000000..fad6ef6
--- /dev/null
+++ b/test/CodeGen/X86/barrier.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 false)
+	ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 255adfb..3857fb1 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -5,7 +5,7 @@
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
+; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
 
 ; PR3666
 ; PR3773
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
new file mode 100644
index 0000000..1f7f6ec
--- /dev/null
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
+
+; CHECK: foo:
+; CHECK: align
+; CHECK: incl  %eax
+; CHECK-NEXT: decl  %ecx
+; CHECK-NEXT: jne
+
+define void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=0]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=0]
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
new file mode 100644
index 0000000..cb63809
--- /dev/null
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
@@ -0,0 +1,28 @@
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp.	\$10}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
+; TODO: By making the first store post-increment as well, the loop setup
+; could be made simpler.
+
+define void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=1]
+	store i64 %tmp645646647.us1547, i64* null
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=1]
+	store i32 %tmp623.us1538, i32* null
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-2.ll b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
new file mode 100644
index 0000000..ae27383
--- /dev/null
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s
+; PR4222
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+module asm ".ident\09\22$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 184499 2008-10-31 10:11:35Z kib $\22"
+	%struct.dumperinfo = type <{ i32 (i8*, i8*, i64, i64, i64)*, i8*, i32, i32, i64, i64 }>
+
+define void @minidumpsys(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i64 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i64 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i64 %tmp690, 0		; <i64> [#uses=1]
+	%indvar688703 = trunc i64 %indvar688 to i32		; <i32> [#uses=1]
+	%tmp692693 = add i32 %indvar688703, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i64 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i64 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}
+
+define void @promote(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i32 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i32 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i32 %tmp690, 0		; <i64> [#uses=1]
+	%tmp692693 = add i32 %indvar688, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i32 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i32 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 05388f9..2a44463 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
-; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 6
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 12
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index f29cbf3..96fef0f 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29
+; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
 
 	%CC = type { %Register }
 	%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index bdbaac0..bf57e78 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s | FileCheck %s
 ; PR2936
 
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
 
 define dllexport x86_fastcallcc i32 @foo() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 1df0920..e577611 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
-; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
-; RUN: llc < %s -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16}
 
 define void @t() nounwind {
 A:
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
new file mode 100644
index 0000000..b303cd1
--- /dev/null
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+; CHECK-NO-FP:     _func:
+; CHECK-NO-FP-NOT: movq %rsp, %rbp
+; CHECK-NO-FP:     nop
+
+; CHECK-FP:      _func:
+; CHECK-FP:      movq %rsp, %rbp
+; CHECK-FP-NEXT: Ltmp1:
+; CHECK-FP:      nop
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 23b45eb..9ded7e0 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,7 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
 ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
 ; RUN:   count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
 ; RUN:   grep fabs\$ | count 3
 
 declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-isel-atomic.ll b/test/CodeGen/X86/fast-isel-atomic.ll
new file mode 100644
index 0000000..74c5868
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-atomic.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -march=x86-64
+; rdar://8204072
+; PR7652
+
+@sc = external global i8
+@uc = external global i8
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1]
+  store i8 %tmp40, i8* @sc
+  %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1]
+  store i8 %tmp41, i8* @uc
+  ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
new file mode 100644
index 0000000..4ab1bc6
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s
+; rdar://8337108
+
+; Fast-isel shouldn't try to look through the compare because it's in a
+; different basic block, so its operands aren't necessarily exported
+; for cross-block usage.
+
+; CHECK: movb    %al, 7(%rsp)
+; CHECK: callq   {{_?}}bar
+; CHECK: movb    7(%rsp), %al
+
+declare void @bar()
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+  %q = add i32 %a, 7
+  %r = add i32 %b, 9
+  %t = icmp ult i32 %q, %r
+  invoke void @bar() to label %next unwind label %unw
+next:
+  br i1 %t, label %true, label %return
+true:
+  call void @bar()
+  br label %return
+return:
+  ret void
+unw:
+  unreachable
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 1270ab7..577dd72 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
        %t15 = load i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
 ; X32: test1:
-; X32:  	movl	(%ecx,%eax,4), %eax
+; X32:  	movl	(%eax,%ecx,4), %eax
 ; X32:  	ret
 
 ; X64: test1:
@@ -23,7 +23,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
        %t15 = load i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
 ; X32: test2:
-; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	movl	(%edx,%ecx,4), %eax
 ; X32:  	ret
 
 ; X64: test2:
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
index 7759bb0..5c62c18 100644
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %e}
 ; PR3242
 
 define void @foo(i32 %x, i32* %p) nounwind {
diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll
new file mode 100644
index 0000000..ffcbf8a
--- /dev/null
+++ b/test/CodeGen/X86/force-align-stack.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; Tests to make sure that we always align the stack out to the minimum needed - 
+; in this case 16-bytes.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.3"
+
+define void @a() nounwind ssp {
+entry:
+; CHECK: _a:
+; CHECK: andl    $-16, %esp
+  %z = alloca <16 x i8>                           ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16
+  call void @b(<16 x i8>* %z) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @b(<16 x i8>*)
diff --git a/test/CodeGen/X86/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll
new file mode 100644
index 0000000..1a695f3
--- /dev/null
+++ b/test/CodeGen/X86/insert-positions.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64 >/dev/null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @test0() nounwind {
+if.end90.i.i:
+  br label %while.body.i.i221.i
+
+while.body.i.i221.i:                              ; preds = %while.cond.backedge.i.i.i, %if.end90.i.i
+  br i1 undef, label %if.then.i.i224.i, label %while.cond.backedge.i.i.i
+
+while.cond.backedge.i.i.i:                        ; preds = %for.end.i.i.i, %while.body.i.i221.i
+  br label %while.body.i.i221.i
+
+if.then.i.i224.i:                                 ; preds = %while.body.i.i221.i
+  switch i32 undef, label %for.cond.i.i226.i [
+    i32 92, label %sw.bb.i.i225.i
+    i32 34, label %sw.bb.i.i225.i
+    i32 110, label %sw.bb21.i.i.i
+  ]
+
+sw.bb.i.i225.i:                                   ; preds = %if.then.i.i224.i, %if.then.i.i224.i
+  unreachable
+
+sw.bb21.i.i.i:                                    ; preds = %if.then.i.i224.i
+  unreachable
+
+for.cond.i.i226.i:                                ; preds = %for.body.i.i.i, %if.then.i.i224.i
+  %0 = phi i64 [ %tmp154.i.i.i, %for.body.i.i.i ], [ 0, %if.then.i.i224.i ] ; <i64> [#uses=2]
+  %tmp154.i.i.i = add i64 %0, 1                   ; <i64> [#uses=2]
+  %i.0.i.i.i = trunc i64 %0 to i32                ; <i32> [#uses=1]
+  br i1 undef, label %land.rhs.i.i.i, label %for.end.i.i.i
+
+land.rhs.i.i.i:                                   ; preds = %for.cond.i.i226.i
+  br i1 undef, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.body.i.i.i:                                   ; preds = %land.rhs.i.i.i
+  br label %for.cond.i.i226.i
+
+for.end.i.i.i:                                    ; preds = %land.rhs.i.i.i, %for.cond.i.i226.i
+  %idx.ext.i.i.i = sext i32 %i.0.i.i.i to i64     ; <i64> [#uses=1]
+  %sub.ptr72.sum.i.i.i = xor i64 %idx.ext.i.i.i, -1 ; <i64> [#uses=1]
+  %pos.addr.1.sum155.i.i.i = add i64 %tmp154.i.i.i, %sub.ptr72.sum.i.i.i ; <i64> [#uses=1]
+  %arrayidx76.i.i.i = getelementptr inbounds i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
+  br label %while.cond.backedge.i.i.i
+}
+
+define void @test1() nounwind {
+entry:
+  %t = shl i32 undef, undef                     ; <i32> [#uses=1]
+  %t9 = sub nsw i32 0, %t                     ; <i32> [#uses=1]
+  br label %outer
+
+outer:                                             ; preds = %bb18, %bb
+  %i12 = phi i32 [ %t21, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  %i13 = phi i32 [ %t20, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  br label %inner
+
+inner:                                             ; preds = %bb16, %bb11
+  %t17 = phi i32 [ %i13, %outer ], [ undef, %inner ] ; <i32> [#uses=1]
+  store i32 %t17, i32* undef
+  br i1 undef, label %bb18, label %inner
+
+bb18:                                             ; preds = %bb16
+  %t19 = add i32 %i13, %t9                 ; <i32> [#uses=1]
+  %t20 = add i32 %t19, %i12                 ; <i32> [#uses=1]
+  %t21 = add i32 %i12, 1                      ; <i32> [#uses=1]
+  br label %outer
+}
diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll
new file mode 100644
index 0000000..45a9b0f
--- /dev/null
+++ b/test/CodeGen/X86/int-intrinsic.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86    | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @llvm.x86.int(i8) nounwind
+
+; CHECK: int3
+; CHECK: ret
+define void @primitive_int3 () {
+bb.entry:
+  call void @llvm.x86.int(i8 3) nounwind
+  ret void
+}
+
+; CHECK: int	$-128
+; CHECK: ret
+define void @primitive_int128 () {
+bb.entry:
+  call void @llvm.x86.int(i8 128) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 71685bb..b0105ac 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3
 
 ; MachineLICM should be able to hoist the symbolic addresses out of
 ; the inner loops.
diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll
new file mode 100644
index 0000000..03468e2
--- /dev/null
+++ b/test/CodeGen/X86/lock-inst-encoding.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: f0:
+; CHECK: addq %rax, (%rdi)
+; CHECK: # encoding: [0xf0,0x48,0x01,0x07]
+; CHECK: ret
+define void @f0(i64* %a0) {
+  %t0 = and i64 1, 1
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+  %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 6c0eb8c..6556fde 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
 
 ; By starting the IV at -64 instead of 0, a cmp is eliminated,
 ; as the flags from the add can be used directly.
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
new file mode 100644
index 0000000..4b7050b
--- /dev/null
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu
+
+; The inner loop should require only one add (and no leas either).
+; rdar://8100380
+
+; CHECK:      BB0_4:
+; CHECK-NEXT:   movb    $0, flags(%rdx)
+; CHECK-NEXT:   addq    %rcx, %rdx
+; CHECK-NEXT:   cmpq    $8192, %rdx
+; CHECK-NEXT:   jl
+
+@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  %tmp = icmp slt i64 2, 8192                     ; <i1> [#uses=1]
+  br i1 %tmp, label %bb, label %bb21
+
+bb:                                               ; preds = %entry
+  br label %bb7
+
+bb7:                                              ; preds = %bb, %bb17
+  %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ]   ; <i64> [#uses=2]
+  %tmp9 = icmp slt i64 2, 8192                    ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %bb17
+
+bb10:                                             ; preds = %bb7
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb11
+  %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2]
+  %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp13, align 1
+  %tmp14 = add nsw i64 %tmp12, %tmp8              ; <i64> [#uses=2]
+  %tmp15 = icmp slt i64 %tmp14, 8192              ; <i1> [#uses=1]
+  br i1 %tmp15, label %bb11, label %bb16
+
+bb16:                                             ; preds = %bb11
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb7
+  %tmp18 = add nsw i64 %tmp8, 1                   ; <i64> [#uses=2]
+  %tmp19 = icmp slt i64 %tmp18, 8192              ; <i1> [#uses=1]
+  br i1 %tmp19, label %bb7, label %bb20
+
+bb20:                                             ; preds = %bb17
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
new file mode 100644
index 0000000..932141d
--- /dev/null
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 1
+; rdar://8168938
+
+; This testcase involves SCEV normalization with the exit value from
+; one loop involved with the increment value for an addrec on another
+; loop. The expression should be properly normalized and simplified,
+; and require only a single division.
+
+%0 = type { %0*, %0* }
+
+@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
+@1 = internal constant [5 x i8] c"Huh?\00"        ; <[5 x i8]*> [#uses=1]
+
+define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind {
+bb:
+  %tmp = alloca %0, align 8                       ; <%0*> [#uses=11]
+  %tmp2 = bitcast %0* %tmp to i8*                 ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
+  %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+  store %0* %tmp, %0** %tmp3
+  %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+  store %0* %tmp, %0** %tmp4
+  %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
+  %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+  %tmp7 = icmp eq i8* %tmp6, null                 ; <i1> [#uses=1]
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = bitcast i8* %tmp6 to i32*               ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp9
+  br label %bb10
+
+bb10:                                             ; preds = %bb8, %bb
+  %tmp11 = bitcast i8* %tmp5 to %0*               ; <%0*> [#uses=1]
+  call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
+  %tmp12 = load %0** %tmp3                        ; <%0*> [#uses=3]
+  %tmp13 = icmp eq %0* %tmp12, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb14, label %bb16
+
+bb14:                                             ; preds = %bb10
+  %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+  br label %bb35
+
+bb16:                                             ; preds = %bb16, %bb10
+  %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
+  %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
+  %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp20 = load %0** %tmp19                       ; <%0*> [#uses=2]
+  %tmp21 = icmp eq %0* %tmp20, %tmp               ; <i1> [#uses=1]
+  %tmp22 = add i64 %tmp17, 1                      ; <i64> [#uses=2]
+  br i1 %tmp21, label %bb23, label %bb16
+
+bb23:                                             ; preds = %bb16
+  %tmp24 = udiv i64 100, %tmp22                   ; <i64> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %bb25, %bb23
+  %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
+  %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
+  %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp29 = load %0** %tmp28                       ; <%0*> [#uses=2]
+  %tmp30 = icmp eq %0* %tmp29, %tmp               ; <i1> [#uses=1]
+  %tmp31 = add i64 %tmp26, 1                      ; <i64> [#uses=2]
+  br i1 %tmp30, label %bb32, label %bb25
+
+bb32:                                             ; preds = %bb25
+  %tmp33 = mul i64 %tmp31, %tmp24                 ; <i64> [#uses=1]
+  %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+  br label %bb35
+
+bb35:                                             ; preds = %bb32, %bb14
+  %tmp36 = load %0** %tmp3                        ; <%0*> [#uses=2]
+  %tmp37 = icmp eq %0* %tmp36, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp37, label %bb44, label %bb38
+
+bb38:                                             ; preds = %bb38, %bb35
+  %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
+  %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp41 = load %0** %tmp40                       ; <%0*> [#uses=2]
+  %tmp42 = bitcast %0* %tmp39 to i8*              ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %tmp42) nounwind
+  %tmp43 = icmp eq %0* %tmp41, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp43, label %bb44, label %bb38
+
+bb44:                                             ; preds = %bb38, %bb35
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*)
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index b7e69b8..d2ff58b 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -464,7 +464,7 @@ bb5:                                              ; preds = %bb3, %entry
 
 ; And the one at %bb68, where we want to be sure to use superhero mode:
 
-; CHECK:      BB10_10:
+; CHECK:      BB10_9:
 ; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
 ; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
 ; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,7 @@ bb5:                                              ; preds = %bb3, %entry
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $-16, %r{{.*}}
-; CHECK-NEXT: BB10_11:
+; CHECK-NEXT: BB10_10:
 ; CHECK-NEXT:   cmpq    $15, %r{{.*}}
 ; CHECK-NEXT:   jg
 
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
new file mode 100644
index 0000000..c9ed3e5
--- /dev/null
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+
+; CHECK: xorl  %eax, %eax
+; CHECK: movsd .LCPI0_0(%rip), %xmm0
+; CHECK: align
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT: movsd A(,%rax,8)
+; CHECK-NEXT: mulsd
+; CHECK-NEXT: movsd
+; CHECK-NEXT: incq %rax
+
+@A = external global [0 x double]
+
+define void @foo(i64 %n) nounwind {
+entry:
+  %cmp5 = icmp sgt i64 %n, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:
+  %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06
+  %tmp3 = load double* %arrayidx, align 8
+  %mul = fmul double %tmp3, 2.300000e+00
+  store double %mul, double* %arrayidx, align 8
+  %inc = add nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll
index ec8db50..d605e4f 100644
--- a/test/CodeGen/X86/lsr-wrap.ll
+++ b/test/CodeGen/X86/lsr-wrap.ll
@@ -3,7 +3,7 @@
 ; LSR would like to use a single IV for both of these, however it's
 ; not safe due to wraparound.
 
-; CHECK: addb  $-4, %r
+; CHECK: addb  $-4, %
 ; CHECK: decw  %
 
 @g_19 = common global i32 0                       ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
deleted file mode 100644
index 796ef7a..0000000
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-	%struct.bf = type { i64, i16, i16, i32 }
-@bfi = external global %struct.bf*
-
-define void @t1() nounwind ssp {
-entry:
-
-; CHECK: andb	$-2, 10(
-; CHECK: andb	$-3, 10(
-
-	%0 = load %struct.bf** @bfi, align 8
-	%1 = getelementptr %struct.bf* %0, i64 0, i32 1
-	%2 = bitcast i16* %1 to i32*
-	%3 = load i32* %2, align 1
-	%4 = and i32 %3, -65537
-	store i32 %4, i32* %2, align 1
-	%5 = load %struct.bf** @bfi, align 8
-	%6 = getelementptr %struct.bf* %5, i64 0, i32 1
-	%7 = bitcast i16* %6 to i32*
-	%8 = load i32* %7, align 1
-	%9 = and i32 %8, -131073
-	store i32 %9, i32* %7, align 1
-	ret void
-}
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 9f9f921..8bed624 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
-define i32 @foo(i32 %A, i32 %B, i32 %C) {
+define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind {
 entry:
 	switch i32 %A, label %out [
 		 i32 1, label %bb
diff --git a/test/CodeGen/X86/pr7882.ll b/test/CodeGen/X86/pr7882.ll
new file mode 100644
index 0000000..88404db
--- /dev/null
+++ b/test/CodeGen/X86/pr7882.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \
+; RUN: | FileCheck %s
+; make sure scheduler honors the flags clobber.  PR 7882.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind
+{
+entry:
+; CHECK: InlineAsm End
+; CHECK: cmpl
+    %res = icmp slt i32 1, %argc
+    %tmp = call i32 asm sideeffect alignstack
+        "push $$0
+         popf
+         mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res)
+    %ret = select i1 %res, i32 %tmp, i32 42
+    ret i32 %ret
+}
diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll
new file mode 100644
index 0000000..10d489b
--- /dev/null
+++ b/test/CodeGen/X86/shl-anyext.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; Codegen should be able to use a 32-bit shift instead of a 64-bit shift.
+; CHECK: shll $16
+
+define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind {
+if.end523:                                        ; preds = %if.end453
+  %conv7981749 = zext i32 %level to i64           ; <i64> [#uses=1]
+  %and799 = shl i64 %conv7981749, 16              ; <i64> [#uses=1]
+  %shl800 = and i64 %and799, 16711680             ; <i64> [#uses=1]
+  %or801 = or i64 %shl800, %a                     ; <i64> [#uses=1]
+  %or806 = or i64 %or801, %b                      ; <i64> [#uses=1]
+  %or811 = or i64 %or806, %c                      ; <i64> [#uses=1]
+  %or819 = or i64 %or811, %d                      ; <i64> [#uses=1]
+  %conv820 = trunc i64 %or819 to i32              ; <i32> [#uses=1]
+  store i32 %conv820, i32* %p
+  ret void
+}
+
+; CHECK: foo:
+
+declare void @bar(i64)
+
+define fastcc void @foo(i32 %t) {
+bb:
+  %tmp = add i32 %t, -1                           ; <i32> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = zext i32 %tmp to i64                    ; <i64> [#uses=2]
+  %tmp3 = add i64 %tmp2, 1                        ; <i64> [#uses=1]
+  %tmp4 = xor i64 %tmp2, 536870911                ; <i64> [#uses=1]
+  %tmp5 = and i64 %tmp3, %tmp4                    ; <i64> [#uses=1]
+  %tmp6 = shl i64 %tmp5, 3                        ; <i64> [#uses=1]
+  %tmp7 = sub i64 64, %tmp6                       ; <i64> [#uses=1]
+  %tmp8 = and i64 %tmp7, 4294967288               ; <i64> [#uses=1]
+  %tmp9 = lshr i64 -1, %tmp8                      ; <i64> [#uses=1]
+  call void @bar(i64 %tmp9)
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 4b27f2e..a3c9957 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=x86    -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; Darwin 8 generates stubs, which don't match
+; XFAIL: apple-darwin8
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index ebcdc65..348121a 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
new file mode 100644
index 0000000..73f88ae
--- /dev/null
+++ b/test/CodeGen/X86/sse1.ll
@@ -0,0 +1,45 @@
+; Tests for SSE1 and below, without SSE2+.
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s
+
+define <8 x i16> @test1(<8 x i32> %a) nounwind {
+; CHECK: test1
+  ret <8 x i16> zeroinitializer
+}
+
+define <8 x i16> @test2(<8 x i32> %a) nounwind {
+; CHECK: test2
+  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %c
+}
+
+; PR7993
+;define <4 x i32> @test3(<4 x i16> %a) nounwind {
+;  %c = sext <4 x i16> %a to <4 x i32>             ; <<4 x i32>> [#uses=1]
+;  ret <4 x i32> %c
+;}
+
+; This should not emit shuffles to populate the top 2 elements of the 4-element
+; vector that this ends up returning.
+; rdar://8368414
+define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fsub float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; CHECK: test4:
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: unpcklps
+; CHECK-NOT: shufps	$16
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 20b8eac..6fc0190 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,14 +1,14 @@
 ; Tests for SSE2 and below, without SSE3+.
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
 
-define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp3 = load <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
         
-; CHECK: t1:
+; CHECK: test1:
 ; CHECK: 	movl	8(%esp), %eax
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
@@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 ; CHECK-NEXT: 	ret
 }
 
-define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp3 = load <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
         
-; CHECK: t2:
+; CHECK: test2:
 ; CHECK: 	movl	8(%esp), %eax
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
@@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
+
+
+define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
+	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
+	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
+	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
+	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp13, <4 x float>* %res
+	ret void
+; CHECK: @test3
+; CHECK: 	unpcklps	
+}
+
+define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp5, <4 x float>* %res
+	ret void
+; CHECK: @test4
+; CHECK: 	pshufd	$50, %xmm0, %xmm0
+}
+
+define <4 x i32> @test5(i8** %ptr) nounwind {
+; CHECK: test5:
+; CHECK: pxor
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+	%tmp = load i8** %ptr		; <i8*> [#uses=1]
+	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
+	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
+	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp36
+}
+
+define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
+        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp2, <4 x float>* %res
+        ret void
+        
+; CHECK: test6:
+; CHECK: 	movaps	(%eax), %xmm0
+; CHECK:	movaps	%xmm0, (%eax)
+}
+
+define void @test7() nounwind {
+        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
+        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
+        store <4 x float> %2, <4 x float>* null
+        ret void
+        
+; CHECK: test7:
+; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	movaps	%xmm0, 0
+}
+
+@x = external global [4 x i32]
+
+define <2 x i64> @test8() nounwind {
+	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp16
+; CHECK: test8:
+; CHECK: movups	(%eax), %xmm0
+}
+
+define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test9:
+; CHECK: movups	8(%esp), %xmm0
+}
+
+define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test10:
+; CHECK: movaps	4(%esp), %xmm0
+}
+
+define <2 x double> @test11(double %a, double %b) nounwind {
+	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
+	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp7
+; CHECK: test11:
+; CHECK: movapd	4(%esp), %xmm0
+}
+
+define void @test12() nounwind {
+        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
+        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp4, <4 x float>* null
+        ret void
+; CHECK: test12:
+; CHECK: movhlps
+; CHECK: shufps
+}
+
+define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
+        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp11, <4 x float>* %res
+        ret void
+; CHECK: test13
+; CHECK: shufps	$69, (%eax), %xmm0
+; CHECK: pshufd	$-40, %xmm0, %xmm0
+}
+
+define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp27
+; CHECK: test14:
+; CHECK: 	addps	%xmm1, %xmm0
+; CHECK: 	subps	%xmm1, %xmm2
+; CHECK: 	movlhps	%xmm2, %xmm0
+}
+
+define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
+entry:
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp4
+; CHECK: test15:
+; CHECK: 	movhlps	%xmm1, %xmm0
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index ef66d1a..3a14fa2 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
 
+; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fadd float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
deleted file mode 100644
index 001a540..0000000
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ /dev/null
@@ -1,361 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep asm-printer %t | grep 166
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
-
-	type { [62 x %struct.Bitvec*] }		; type %0
-	type { i8* }		; type %1
-	type { double }		; type %2
-	%struct..5sPragmaType = type { i8*, i32 }
-	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
-	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
-	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
-	%struct.AuxData = type { i8*, void (i8*)* }
-	%struct.Bitvec = type { i32, i32, i32, %0 }
-	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
-	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
-	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
-	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
-	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
-	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
-	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
-	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
-	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
-	%struct.Context = type { i64, i32, %struct.Fifo }
-	%struct.CountCtx = type { i64 }
-	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
-	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
-	%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
-	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
-	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
-	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
-	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
-	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
-	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
-	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
-	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
-	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
-	%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
-	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
-	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
-	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
-	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
-	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
-	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
-	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
-	%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
-	%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
-	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
-	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
-	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
-	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
-	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
-	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
-	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
-	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
-	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
-	%struct._OvflCell = type { i8*, i16 }
-	%struct._ht = type { i32, %struct.HashElem* }
-	%struct.sColMap = type { i32, i8* }
-	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
-	%struct.sqlite3InitInfo = type { i32, i32, i8 }
-	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
-	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
-	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
-	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
-	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
-	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
-	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
-	%struct.sqlite3_mutex = type opaque
-	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
-	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
-	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
-entry:
-	%0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18		; <i8**> [#uses=1]
-	%1 = load i8** %0, align 8		; <i8*> [#uses=34]
-	%2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12		; <i16*> [#uses=1]
-	%3 = load i16* %2, align 2		; <i16> [#uses=1]
-	%4 = zext i16 %3 to i32		; <i32> [#uses=2]
-	%5 = shl i32 %idx, 1		; <i32> [#uses=2]
-	%6 = add i32 %4, %5		; <i32> [#uses=1]
-	%7 = sext i32 %6 to i64		; <i64> [#uses=2]
-	%8 = getelementptr i8* %1, i64 %7		; <i8*> [#uses=1]
-	%9 = load i8* %8, align 1		; <i8> [#uses=2]
-	%10 = zext i8 %9 to i32		; <i32> [#uses=1]
-	%11 = shl i32 %10, 8		; <i32> [#uses=1]
-	%.sum3 = add i64 %7, 1		; <i64> [#uses=1]
-	%12 = getelementptr i8* %1, i64 %.sum3		; <i8*> [#uses=1]
-	%13 = load i8* %12, align 1		; <i8> [#uses=2]
-	%14 = zext i8 %13 to i32		; <i32> [#uses=1]
-	%15 = or i32 %11, %14		; <i32> [#uses=3]
-	%16 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
-	%size_addr.0.i = select i1 %16, i32 4, i32 %sz		; <i32> [#uses=3]
-	%17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8		; <i8*> [#uses=5]
-	%18 = load i8* %17, align 8		; <i8> [#uses=1]
-	%19 = zext i8 %18 to i32		; <i32> [#uses=4]
-	%20 = add i32 %19, 1		; <i32> [#uses=2]
-	br label %bb3.i
-
-bb3.i:		; preds = %bb3.i, %entry
-	%addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ]		; <i32> [#uses=1]
-	%21 = sext i32 %addr.0.i to i64		; <i64> [#uses=2]
-	%22 = getelementptr i8* %1, i64 %21		; <i8*> [#uses=2]
-	%23 = load i8* %22, align 1		; <i8> [#uses=2]
-	%24 = zext i8 %23 to i32		; <i32> [#uses=1]
-	%25 = shl i32 %24, 8		; <i32> [#uses=1]
-	%.sum34.i = add i64 %21, 1		; <i64> [#uses=1]
-	%26 = getelementptr i8* %1, i64 %.sum34.i		; <i8*> [#uses=2]
-	%27 = load i8* %26, align 1		; <i8> [#uses=2]
-	%28 = zext i8 %27 to i32		; <i32> [#uses=1]
-	%29 = or i32 %25, %28		; <i32> [#uses=3]
-	%.not.i = icmp uge i32 %29, %15		; <i1> [#uses=1]
-	%30 = icmp eq i32 %29, 0		; <i1> [#uses=1]
-	%or.cond.i = or i1 %30, %.not.i		; <i1> [#uses=1]
-	br i1 %or.cond.i, label %bb5.i, label %bb3.i
-
-bb5.i:		; preds = %bb3.i
-	store i8 %9, i8* %22, align 1
-	store i8 %13, i8* %26, align 1
-	%31 = zext i32 %15 to i64		; <i64> [#uses=2]
-	%32 = getelementptr i8* %1, i64 %31		; <i8*> [#uses=1]
-	store i8 %23, i8* %32, align 1
-	%.sum32.i = add i64 %31, 1		; <i64> [#uses=1]
-	%33 = getelementptr i8* %1, i64 %.sum32.i		; <i8*> [#uses=1]
-	store i8 %27, i8* %33, align 1
-	%34 = add i32 %15, 2		; <i32> [#uses=1]
-	%35 = zext i32 %34 to i64		; <i64> [#uses=2]
-	%36 = getelementptr i8* %1, i64 %35		; <i8*> [#uses=1]
-	%37 = lshr i32 %size_addr.0.i, 8		; <i32> [#uses=1]
-	%38 = trunc i32 %37 to i8		; <i8> [#uses=1]
-	store i8 %38, i8* %36, align 1
-	%39 = trunc i32 %size_addr.0.i to i8		; <i8> [#uses=1]
-	%.sum31.i = add i64 %35, 1		; <i64> [#uses=1]
-	%40 = getelementptr i8* %1, i64 %.sum31.i		; <i8*> [#uses=1]
-	store i8 %39, i8* %40, align 1
-	%41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14		; <i16*> [#uses=4]
-	%42 = load i16* %41, align 2		; <i16> [#uses=1]
-	%43 = trunc i32 %size_addr.0.i to i16		; <i16> [#uses=1]
-	%44 = add i16 %42, %43		; <i16> [#uses=1]
-	store i16 %44, i16* %41, align 2
-	%45 = load i8* %17, align 8		; <i8> [#uses=1]
-	%46 = zext i8 %45 to i32		; <i32> [#uses=1]
-	%47 = add i32 %46, 1		; <i32> [#uses=1]
-	br label %bb11.outer.i
-
-bb11.outer.i:		; preds = %bb6.i, %bb5.i
-	%addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ]		; <i32> [#uses=1]
-	%48 = sext i32 %addr.1.ph.i to i64		; <i64> [#uses=2]
-	%49 = getelementptr i8* %1, i64 %48		; <i8*> [#uses=1]
-	%.sum30.i = add i64 %48, 1		; <i64> [#uses=1]
-	%50 = getelementptr i8* %1, i64 %.sum30.i		; <i8*> [#uses=1]
-	br label %bb11.i
-
-bb6.i:		; preds = %bb11.i
-	%51 = zext i32 %111 to i64		; <i64> [#uses=2]
-	%52 = getelementptr i8* %1, i64 %51		; <i8*> [#uses=2]
-	%53 = load i8* %52, align 1		; <i8> [#uses=1]
-	%54 = zext i8 %53 to i32		; <i32> [#uses=1]
-	%55 = shl i32 %54, 8		; <i32> [#uses=1]
-	%.sum24.i = add i64 %51, 1		; <i64> [#uses=1]
-	%56 = getelementptr i8* %1, i64 %.sum24.i		; <i8*> [#uses=2]
-	%57 = load i8* %56, align 1		; <i8> [#uses=3]
-	%58 = zext i8 %57 to i32		; <i32> [#uses=1]
-	%59 = or i32 %55, %58		; <i32> [#uses=5]
-	%60 = add i32 %111, 2		; <i32> [#uses=1]
-	%61 = zext i32 %60 to i64		; <i64> [#uses=2]
-	%62 = getelementptr i8* %1, i64 %61		; <i8*> [#uses=2]
-	%63 = load i8* %62, align 1		; <i8> [#uses=1]
-	%64 = zext i8 %63 to i32		; <i32> [#uses=1]
-	%65 = shl i32 %64, 8		; <i32> [#uses=1]
-	%.sum23.i = add i64 %61, 1		; <i64> [#uses=1]
-	%66 = getelementptr i8* %1, i64 %.sum23.i		; <i8*> [#uses=2]
-	%67 = load i8* %66, align 1		; <i8> [#uses=2]
-	%68 = zext i8 %67 to i32		; <i32> [#uses=1]
-	%69 = or i32 %65, %68		; <i32> [#uses=1]
-	%70 = add i32 %111, 3		; <i32> [#uses=1]
-	%71 = add i32 %70, %69		; <i32> [#uses=1]
-	%72 = icmp sge i32 %71, %59		; <i1> [#uses=1]
-	%73 = icmp ne i32 %59, 0		; <i1> [#uses=1]
-	%74 = and i1 %72, %73		; <i1> [#uses=1]
-	br i1 %74, label %bb9.i, label %bb11.outer.i
-
-bb9.i:		; preds = %bb6.i
-	%75 = load i8* %17, align 8		; <i8> [#uses=1]
-	%76 = zext i8 %75 to i32		; <i32> [#uses=1]
-	%77 = add i32 %76, 7		; <i32> [#uses=1]
-	%78 = zext i32 %77 to i64		; <i64> [#uses=1]
-	%79 = getelementptr i8* %1, i64 %78		; <i8*> [#uses=2]
-	%80 = load i8* %79, align 1		; <i8> [#uses=1]
-	%81 = sub i8 %109, %57		; <i8> [#uses=1]
-	%82 = add i8 %81, %67		; <i8> [#uses=1]
-	%83 = add i8 %82, %80		; <i8> [#uses=1]
-	store i8 %83, i8* %79, align 1
-	%84 = zext i32 %59 to i64		; <i64> [#uses=2]
-	%85 = getelementptr i8* %1, i64 %84		; <i8*> [#uses=1]
-	%86 = load i8* %85, align 1		; <i8> [#uses=1]
-	store i8 %86, i8* %52, align 1
-	%.sum22.i = add i64 %84, 1		; <i64> [#uses=1]
-	%87 = getelementptr i8* %1, i64 %.sum22.i		; <i8*> [#uses=1]
-	%88 = load i8* %87, align 1		; <i8> [#uses=1]
-	store i8 %88, i8* %56, align 1
-	%89 = add i32 %59, 2		; <i32> [#uses=1]
-	%90 = zext i32 %89 to i64		; <i64> [#uses=2]
-	%91 = getelementptr i8* %1, i64 %90		; <i8*> [#uses=1]
-	%92 = load i8* %91, align 1		; <i8> [#uses=1]
-	%93 = zext i8 %92 to i32		; <i32> [#uses=1]
-	%94 = shl i32 %93, 8		; <i32> [#uses=1]
-	%.sum20.i = add i64 %90, 1		; <i64> [#uses=1]
-	%95 = getelementptr i8* %1, i64 %.sum20.i		; <i8*> [#uses=2]
-	%96 = load i8* %95, align 1		; <i8> [#uses=1]
-	%97 = zext i8 %96 to i32		; <i32> [#uses=1]
-	%98 = or i32 %94, %97		; <i32> [#uses=1]
-	%99 = sub i32 %59, %111		; <i32> [#uses=1]
-	%100 = add i32 %99, %98		; <i32> [#uses=1]
-	%101 = lshr i32 %100, 8		; <i32> [#uses=1]
-	%102 = trunc i32 %101 to i8		; <i8> [#uses=1]
-	store i8 %102, i8* %62, align 1
-	%103 = load i8* %95, align 1		; <i8> [#uses=1]
-	%104 = sub i8 %57, %109		; <i8> [#uses=1]
-	%105 = add i8 %104, %103		; <i8> [#uses=1]
-	store i8 %105, i8* %66, align 1
-	br label %bb11.i
-
-bb11.i:		; preds = %bb9.i, %bb11.outer.i
-	%106 = load i8* %49, align 1		; <i8> [#uses=1]
-	%107 = zext i8 %106 to i32		; <i32> [#uses=1]
-	%108 = shl i32 %107, 8		; <i32> [#uses=1]
-	%109 = load i8* %50, align 1		; <i8> [#uses=3]
-	%110 = zext i8 %109 to i32		; <i32> [#uses=1]
-	%111 = or i32 %108, %110		; <i32> [#uses=6]
-	%112 = icmp eq i32 %111, 0		; <i1> [#uses=1]
-	br i1 %112, label %bb12.i, label %bb6.i
-
-bb12.i:		; preds = %bb11.i
-	%113 = zext i32 %20 to i64		; <i64> [#uses=2]
-	%114 = getelementptr i8* %1, i64 %113		; <i8*> [#uses=2]
-	%115 = load i8* %114, align 1		; <i8> [#uses=2]
-	%116 = add i32 %19, 5		; <i32> [#uses=1]
-	%117 = zext i32 %116 to i64		; <i64> [#uses=2]
-	%118 = getelementptr i8* %1, i64 %117		; <i8*> [#uses=3]
-	%119 = load i8* %118, align 1		; <i8> [#uses=1]
-	%120 = icmp eq i8 %115, %119		; <i1> [#uses=1]
-	br i1 %120, label %bb13.i, label %bb1.preheader
-
-bb13.i:		; preds = %bb12.i
-	%121 = add i32 %19, 2		; <i32> [#uses=1]
-	%122 = zext i32 %121 to i64		; <i64> [#uses=1]
-	%123 = getelementptr i8* %1, i64 %122		; <i8*> [#uses=1]
-	%124 = load i8* %123, align 1		; <i8> [#uses=1]
-	%125 = add i32 %19, 6		; <i32> [#uses=1]
-	%126 = zext i32 %125 to i64		; <i64> [#uses=1]
-	%127 = getelementptr i8* %1, i64 %126		; <i8*> [#uses=1]
-	%128 = load i8* %127, align 1		; <i8> [#uses=1]
-	%129 = icmp eq i8 %124, %128		; <i1> [#uses=1]
-	br i1 %129, label %bb14.i, label %bb1.preheader
-
-bb14.i:		; preds = %bb13.i
-	%130 = zext i8 %115 to i32		; <i32> [#uses=1]
-	%131 = shl i32 %130, 8		; <i32> [#uses=1]
-	%.sum29.i = add i64 %113, 1		; <i64> [#uses=1]
-	%132 = getelementptr i8* %1, i64 %.sum29.i		; <i8*> [#uses=1]
-	%133 = load i8* %132, align 1		; <i8> [#uses=1]
-	%134 = zext i8 %133 to i32		; <i32> [#uses=1]
-	%135 = or i32 %134, %131		; <i32> [#uses=2]
-	%136 = zext i32 %135 to i64		; <i64> [#uses=1]
-	%137 = getelementptr i8* %1, i64 %136		; <i8*> [#uses=1]
-	%138 = bitcast i8* %137 to i16*		; <i16*> [#uses=1]
-	%139 = bitcast i8* %114 to i16*		; <i16*> [#uses=1]
-	%tmp.i = load i16* %138, align 1		; <i16> [#uses=1]
-	store i16 %tmp.i, i16* %139, align 1
-	%140 = load i8* %118, align 1		; <i8> [#uses=1]
-	%141 = zext i8 %140 to i32		; <i32> [#uses=1]
-	%142 = shl i32 %141, 8		; <i32> [#uses=1]
-	%.sum28.i = add i64 %117, 1		; <i64> [#uses=1]
-	%143 = getelementptr i8* %1, i64 %.sum28.i		; <i8*> [#uses=2]
-	%144 = load i8* %143, align 1		; <i8> [#uses=2]
-	%145 = zext i8 %144 to i32		; <i32> [#uses=1]
-	%146 = or i32 %142, %145		; <i32> [#uses=1]
-	%147 = add i32 %135, 2		; <i32> [#uses=1]
-	%148 = zext i32 %147 to i64		; <i64> [#uses=2]
-	%149 = getelementptr i8* %1, i64 %148		; <i8*> [#uses=1]
-	%150 = load i8* %149, align 1		; <i8> [#uses=1]
-	%151 = zext i8 %150 to i32		; <i32> [#uses=1]
-	%152 = shl i32 %151, 8		; <i32> [#uses=1]
-	%.sum27.i = add i64 %148, 1		; <i64> [#uses=1]
-	%153 = getelementptr i8* %1, i64 %.sum27.i		; <i8*> [#uses=2]
-	%154 = load i8* %153, align 1		; <i8> [#uses=1]
-	%155 = zext i8 %154 to i32		; <i32> [#uses=1]
-	%156 = or i32 %152, %155		; <i32> [#uses=1]
-	%157 = add i32 %156, %146		; <i32> [#uses=1]
-	%158 = lshr i32 %157, 8		; <i32> [#uses=1]
-	%159 = trunc i32 %158 to i8		; <i8> [#uses=1]
-	store i8 %159, i8* %118, align 1
-	%160 = load i8* %153, align 1		; <i8> [#uses=1]
-	%161 = add i8 %160, %144		; <i8> [#uses=1]
-	store i8 %161, i8* %143, align 1
-	br label %bb1.preheader
-
-bb1.preheader:		; preds = %bb14.i, %bb13.i, %bb12.i
-	%i.08 = add i32 %idx, 1		; <i32> [#uses=2]
-	%162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15		; <i16*> [#uses=4]
-	%163 = load i16* %162, align 4		; <i16> [#uses=2]
-	%164 = zext i16 %163 to i32		; <i32> [#uses=1]
-	%165 = icmp sgt i32 %164, %i.08		; <i1> [#uses=1]
-	br i1 %165, label %bb, label %bb2
-
-bb:		; preds = %bb, %bb1.preheader
-	%indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ]		; <i64> [#uses=3]
-	%tmp16 = add i32 %5, %4		; <i32> [#uses=1]
-	%tmp.17 = sext i32 %tmp16 to i64		; <i64> [#uses=1]
-	%tmp19 = shl i64 %indvar, 1		; <i64> [#uses=1]
-	%ctg2.sum = add i64 %tmp.17, %tmp19		; <i64> [#uses=4]
-	%ctg229 = getelementptr i8* %1, i64 %ctg2.sum		; <i8*> [#uses=1]
-	%ctg229.sum31 = add i64 %ctg2.sum, 2		; <i64> [#uses=1]
-	%166 = getelementptr i8* %1, i64 %ctg229.sum31		; <i8*> [#uses=1]
-	%167 = load i8* %166, align 1		; <i8> [#uses=1]
-	store i8 %167, i8* %ctg229
-	%ctg229.sum30 = add i64 %ctg2.sum, 3		; <i64> [#uses=1]
-	%168 = getelementptr i8* %1, i64 %ctg229.sum30		; <i8*> [#uses=1]
-	%169 = load i8* %168, align 1		; <i8> [#uses=1]
-	%ctg229.sum = add i64 %ctg2.sum, 1		; <i64> [#uses=1]
-	%170 = getelementptr i8* %1, i64 %ctg229.sum		; <i8*> [#uses=1]
-	store i8 %169, i8* %170, align 1
-	%indvar15 = trunc i64 %indvar to i32		; <i32> [#uses=1]
-	%i.09 = add i32 %indvar15, %i.08		; <i32> [#uses=1]
-	%i.0 = add i32 %i.09, 1		; <i32> [#uses=1]
-	%171 = load i16* %162, align 4		; <i16> [#uses=2]
-	%172 = zext i16 %171 to i32		; <i32> [#uses=1]
-	%173 = icmp sgt i32 %172, %i.0		; <i1> [#uses=1]
-	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
-	br i1 %173, label %bb, label %bb2
-
-bb2:		; preds = %bb, %bb1.preheader
-	%174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ]		; <i16> [#uses=1]
-	%175 = add i16 %174, -1		; <i16> [#uses=2]
-	store i16 %175, i16* %162, align 4
-	%176 = load i8* %17, align 8		; <i8> [#uses=1]
-	%177 = zext i8 %176 to i32		; <i32> [#uses=1]
-	%178 = add i32 %177, 3		; <i32> [#uses=1]
-	%179 = zext i32 %178 to i64		; <i64> [#uses=1]
-	%180 = getelementptr i8* %1, i64 %179		; <i8*> [#uses=1]
-	%181 = lshr i16 %175, 8		; <i16> [#uses=1]
-	%182 = trunc i16 %181 to i8		; <i8> [#uses=1]
-	store i8 %182, i8* %180, align 1
-	%183 = load i8* %17, align 8		; <i8> [#uses=1]
-	%184 = zext i8 %183 to i32		; <i32> [#uses=1]
-	%185 = add i32 %184, 3		; <i32> [#uses=1]
-	%186 = zext i32 %185 to i64		; <i64> [#uses=1]
-	%187 = load i16* %162, align 4		; <i16> [#uses=1]
-	%188 = trunc i16 %187 to i8		; <i8> [#uses=1]
-	%.sum = add i64 %186, 1		; <i64> [#uses=1]
-	%189 = getelementptr i8* %1, i64 %.sum		; <i8*> [#uses=1]
-	store i8 %188, i8* %189, align 1
-	%190 = load i16* %41, align 2		; <i16> [#uses=1]
-	%191 = add i16 %190, 2		; <i16> [#uses=1]
-	store i16 %191, i16* %41, align 2
-	%192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1		; <i8*> [#uses=1]
-	store i8 1, i8* %192, align 1
-	ret void
-}
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index 70204bc..a7c2517 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -2,7 +2,7 @@
 ; PR5851
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
 
 %0 = type { void (...)* }
 
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 5682e7c..abc5174 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -1,6 +1,6 @@
 ; rdar://7860110
-; RUN: llc < %s | FileCheck %s -check-prefix=X64
-; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
 
@@ -125,3 +125,30 @@ entry:
 ; X32: movb	%cl, 5(%{{.*}})
 }
 
+; PR7833
+
+@g_16 = internal global i32 -1
+
+; X64: test8:
+; X64-NEXT: movl _g_16(%rip), %eax
+; X64-NEXT: movl $0, _g_16(%rip)
+; X64-NEXT: orl  $1, %eax
+; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: ret
+define void @test8() nounwind {
+  %tmp = load i32* @g_16
+  store i32 0, i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
+
+; X64: test9:
+; X64-NEXT: orb $1, _g_16(%rip)
+; X64-NEXT: ret
+define void @test9() nounwind {
+  %tmp = load i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index d54fb41..7f92af4 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,8 +1,6 @@
-; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL
+; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
 
-; Fast-isel shouldn't attempt to handle this tail call, and it should
-; cleanly terminate instruction selection in the block after it's
-; done to avoid emitting invalid MachineInstrs.
+; Fast-isel shouldn't attempt to cope with tail calls.
 
 %0 = type { i64, i32, i8* }
 
@@ -11,3 +9,11 @@ fail:                                             ; preds = %entry
   %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
   ret i8* %tmp20
 }
+
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+ ret i32 %0
+}
+
+declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 4c37225..6f6d6f2 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -3,7 +3,7 @@
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
 
-define i32 @main() nounwind {
+define i32 @foo() nounwind {
 bb1.thread:
 	br label %bb1
 
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 9c4b773..76c3fdf 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
   store float %c, float* %P2
   ret void
 ; X64: test1:
-; X64-NEXT: addss	%xmm1, %xmm0
-; X64-NEXT: movss	%xmm0, (%rdi)
+; X64-NEXT: pshufd	$1, %xmm0, %xmm1
+; X64-NEXT: addss	%xmm0, %xmm1
+; X64-NEXT: movss	%xmm1, (%rdi)
 ; X64-NEXT: ret
 
 ; X32: test1:
-; X32-NEXT: movss	4(%esp), %xmm0
-; X32-NEXT: addss	8(%esp), %xmm0
-; X32-NEXT: movl	12(%esp), %eax
-; X32-NEXT: movss	%xmm0, (%eax)
+; X32-NEXT: pshufd	$1, %xmm0, %xmm1
+; X32-NEXT: addss	%xmm0, %xmm1
+; X32-NEXT: movl	4(%esp), %eax
+; X32-NEXT: movss	%xmm1, (%eax)
 ; X32-NEXT: ret
 }
 
@@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
   ret <2 x float> %Z
   
 ; X64: test2:
-; X64-NEXT: insertps $0
-; X64-NEXT: insertps $16
-; X64-NEXT: insertps $0
-; X64-NEXT: insertps $16
-; X64-NEXT: addps
-; X64-NEXT: movaps
-; X64-NEXT: pshufd
+; X64-NEXT: addps	%xmm1, %xmm0
 ; X64-NEXT: ret
 }
+
+
+define <2 x float> @test3(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+	ret <2 x float> %C
+; CHECK: test3:
+; CHECK-NEXT: 	addps	%xmm0, %xmm0
+; CHECK-NEXT: 	ret
+}
+
+define <2 x float> @test4(<2 x float> %A) nounwind {
+	%C = fadd <2 x float> %A, %A
+	ret <2 x float> %C
+; CHECK: test4:
+; CHECK-NEXT: 	addps	%xmm0, %xmm0
+; CHECK-NEXT: 	ret
+}
+
+define <4 x float> @test5(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+        br label %BB
+        
+BB:
+        %D = fadd <2 x float> %C, %C
+	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+	ret <4 x float> %E
+        
+; CHECK: _test5:
+; CHECK-NEXT: 	addps	%xmm0, %xmm0
+; CHECK-NEXT: 	addps	%xmm0, %xmm0
+; CHECK-NEXT: 	ret
+}
+
+
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index 6f18d13..f853164 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,15 +1,16 @@
-; RUN: llc < %s -march=x86-64 
-; RUN: llc < %s -march=x86-64 -disable-mmx
+; RUN: llc < %s -march=x86-64 -mcpu=core2
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx
+
 
 define <8 x i32> @a(<8 x i16> %a) nounwind {
   %c = sext <8 x i16> %a to <8 x i32>
   ret <8 x i32> %c
 }
 
-define <3 x i32> @b(<3 x i16> %a) nounwind {
-  %c = sext <3 x i16> %a to <3 x i32>
-  ret <3 x i32> %c
-}
+;define <3 x i32> @b(<3 x i16> %a) nounwind {
+;  %c = sext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
 
 define <1 x i32> @c(<1 x i16> %a) nounwind {
   %c = sext <1 x i16> %a to <1 x i32>
@@ -21,10 +22,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwind {
   ret <8 x i32> %c
 }
 
-define <3 x i32> @e(<3 x i16> %a) nounwind {
-  %c = zext <3 x i16> %a to <3 x i32>
-  ret <3 x i32> %c
-}
+;define <3 x i32> @e(<3 x i16> %a) nounwind {
+;  %c = zext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
 
 define <1 x i32> @f(<1 x i16> %a) nounwind {
   %c = zext <1 x i16> %a to <1 x i32>
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 54aa43f..de3b36f 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll
index 2e829df..e5a7ccc 100644
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
-; RUN: grep pinsrd %t | count 2
+; RUN: grep pinsrd %t | count 1
 
 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
new file mode 100644
index 0000000..9ef7fbd
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shll
+; CHECK: pslld
+; CHECK: paddd
+; CHECK: cvttps2dq
+; CHECK: pmulld
+
+  %shl = shl <4 x i32> %r, %a                     ; <<4 x i32>> [#uses=1]
+  %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+  %shl = shl <16 x i8> %r, %a                     ; <<16 x i8>> [#uses=1]
+  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
deleted file mode 100644
index a63e386..0000000
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep unpcklps %t | count 1
-; RUN: grep pshufd   %t | count 1
-; RUN: not grep {sub.*esp} %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
-	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
-	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
-	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
-	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
-	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
-	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
-	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
-	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
-	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
-	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp13, <4 x float>* %res
-	ret void
-}
-
-define void @test2(<4 x float> %X, <4 x float>* %res) {
-	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp5, <4 x float>* %res
-	ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 9fc09df..861a1cc 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 6d1bac0..fc06b95 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 7562f1d..1b104de 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2  |     grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define i32 @t() nounwind optsize {
 entry:
+; CHECK: punpckldq
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
 	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
deleted file mode 100644
index f4930b0..0000000
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movlhps %t | count 1
-; RUN: grep movhlps %t | count 1
-
-define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
-        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
-        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
-        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
-        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
-        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
-        ret <4 x float> %tmp27
-}
-
-define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) {
-entry:
-        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
-        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
-        ret <4 x float> %tmp4
-}
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
new file mode 100644
index 0000000..1ed858d
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
+entry:
+; CHECK: movaps  (%rdi), %xmm0
+; CHECK-NEXT: movaps  %xmm0, %xmm1
+; CHECK-NEXT: movlps  (%rax), %xmm1
+; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+  %0 = load <4 x i32>* undef, align 16
+  %1 = load <4 x i32>* %a0, align 16
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
deleted file mode 100644
index 829fedf..0000000
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep shuf %t | count 2
-; RUN: not grep unpck %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) {
-        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
-        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
-        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp11, <4 x float>* %res
-        ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
deleted file mode 100644
index c24167a..0000000
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movhlps %t | count 1
-; RUN: grep shufps  %t | count 1
-
-define void @test() nounwind {
-        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
-        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
-        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp4, <4 x float>* null
-        ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
deleted file mode 100644
index 28fd59b..0000000
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movapd %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: grep movups %t | count 2
-
-target triple = "i686-apple-darwin"
-@x = external global [4 x i32]
-
-define <2 x i64> @test1() {
-	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
-	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
-	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
-	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
-	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
-	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
-	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
-	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
-	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %tmp16
-}
-
-define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) {
-	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
-	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
-	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
-	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
-	ret <4 x float> %tmp13
-}
-
-define <4 x float> @test3(float %a, float %b, float %c, float %d) {
-	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
-	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
-	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
-	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
-	ret <4 x float> %tmp13
-}
-
-define <2 x double> @test4(double %a, double %b) {
-	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
-	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
-	ret <2 x double> %tmp7
-}
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
deleted file mode 100644
index 64bd6a3..0000000
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep pxor %t | count 1
-; RUN: not grep shufps %t
-
-define void @test() {
-        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
-        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
-        store <4 x float> %2, <4 x float>* null
-        unreachable
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
deleted file mode 100644
index 964ce7b..0000000
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | \
-; RUN:   not grep shufps
-
-define void @test(<4 x float>* %res, <4 x float>* %A) {
-        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp2, <4 x float>* %res
-        ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
deleted file mode 100644
index 0719586..0000000
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
-
-define <4 x i32> @test(i8** %ptr) {
-; CHECK: pxor
-; CHECK: punpcklbw
-; CHECK: punpcklwd
-
-	%tmp = load i8** %ptr		; <i8*> [#uses=1]
-	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
-	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
-	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
-	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
-	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
-	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
-	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp36
-}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 25dde57..463f522 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -3,7 +3,8 @@
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: insertps
+; CHECK: shuf:
+; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
 	%val = fadd <3 x float> %x, %src2
@@ -15,7 +16,8 @@ entry:
 ; widening shuffle v3float with a different mask and then a add
 define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: insertps
+; CHECK: shuf2:
+; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
 	%val = fadd <3 x float> %x, %src2
@@ -26,7 +28,7 @@ entry:
 ; Example of when widening a v3float operation causes the DAG to replace a node
 ; with the operation that we are currently widening, i.e. when replacing
 ; opA with opB, the DAG will produce new operations with opA.
-define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
+define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: pshufd
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
new file mode 100644
index 0000000..27d3358
--- /dev/null
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; Windows and mingw require a prologue helper routine if more than 4096 bytes area
+; allocated on the stack.  Windows uses __chkstk and mingw uses __alloca.  __alloca
+; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer.
+; The 64-bit version of __chkstk is only responsible for probing the stack.  The 64-bit
+; prologue is responsible for adjusting the stack pointer.
+
+; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
+define i32 @main4k() nounwind {
+entry:
+; WIN_X32:    call __chkstk
+; WIN_X64:    call __chkstk
+; MINGW_X32:  call __alloca
+; MINGW_X64:  call _alloca
+; LINUX-NOT:  call __chkstk
+  %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
+  ret i32 0
+}
+
+; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack
+; allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X32:       # BB#0:
+; WIN_X32-NOT:   call __chkstk
+; WIN_X32:       ret
+
+; WIN_X64:       # BB#0:
+; WIN_X64-NOT:   call __chkstk
+; WIN_X64:       ret
+
+; MINGW_X64:     # BB#0:
+; MINGW_X64-NOT: call _alloca
+; MINGW_X64:     ret
+
+; LINUX:         # BB#0:
+; LINUX-NOT:     call __chkstk
+; LINUX:         ret
+  %array128 = alloca [128 x i8], align 16         ; <[128 x i8]*> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 3e3bb95..4470074 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer  | grep 12
 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
 declare void @bar(double %x)
author	dim <dim@FreeBSD.org>	2010-09-17 15:48:55 +0000
committer	dim <dim@FreeBSD.org>	2010-09-17 15:48:55 +0000
commit	5d5cc59cc77afe655b3707cb0e69e0827b444cad (patch)
tree	36453626c792cccd91f783a38a169d610a6b9db9 /test/CodeGen/X86
parent	786a18553586229ad99ecb5ecde8a9d914c45e27 (diff)
download	FreeBSD-src-5d5cc59cc77afe655b3707cb0e69e0827b444cad.zip FreeBSD-src-5d5cc59cc77afe655b3707cb0e69e0827b444cad.tar.gz