summaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll41
-rw-r--r--test/CodeGen/X86/2007-05-15-maskmovq.ll8
-rw-r--r--test/CodeGen/X86/2007-06-15-IntToMMX.ll13
-rw-r--r--test/CodeGen/X86/2007-07-03-GR64ToVR64.ll14
-rw-r--r--test/CodeGen/X86/2007-10-16-fp80_select.ll19
-rw-r--r--test/CodeGen/X86/2008-02-18-TailMergingBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-08-CoalescerCrash.ll8
-rw-r--r--test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll4
-rw-r--r--test/CodeGen/X86/2008-07-19-movups-spills.ll3
-rw-r--r--test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll6
-rw-r--r--test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll22
-rw-r--r--test/CodeGen/X86/2008-09-17-inline-asm-1.ll16
-rw-r--r--test/CodeGen/X86/2008-10-27-CoalescerBug.ll10
-rw-r--r--test/CodeGen/X86/2008-10-27-StackRealignment.ll4
-rw-r--r--test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll9
-rw-r--r--test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll9
-rw-r--r--test/CodeGen/X86/2009-01-13-DoubleUpdate.ll2
-rw-r--r--test/CodeGen/X86/2009-01-27-NullStrings.ll2
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll2
-rw-r--r--test/CodeGen/X86/2009-04-24.ll3
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll9
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll14
-rw-r--r--test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll2
-rw-r--r--test/CodeGen/X86/2009-07-07-SplitICmp.ll2
-rw-r--r--test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll8
-rw-r--r--test/CodeGen/X86/2009-08-06-inlineasm.ll6
-rw-r--r--test/CodeGen/X86/2009-09-10-SpillComments.ll4
-rw-r--r--test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll2
-rw-r--r--test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll42
-rw-r--r--test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll64
-rw-r--r--test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll2
-rw-r--r--test/CodeGen/X86/2010-05-25-DotDebugLoc.ll7
-rw-r--r--test/CodeGen/X86/2010-05-26-DotDebugLoc.ll2
-rw-r--r--test/CodeGen/X86/2010-05-28-Crash.ll6
-rw-r--r--test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll8
-rw-r--r--test/CodeGen/X86/2010-07-02-asm-alignstack.ll4
-rw-r--r--test/CodeGen/X86/2010-09-16-EmptyFilename.ll29
-rw-r--r--test/CodeGen/X86/2010-09-16-asmcrash.ll56
-rw-r--r--test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll26
-rw-r--r--test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll71
-rw-r--r--test/CodeGen/X86/2010-10-08-cmpxchg8b.ll28
-rw-r--r--test/CodeGen/X86/2010-11-02-DbgParameter.ll35
-rw-r--r--test/CodeGen/X86/2010-11-09-MOVLPS.ll66
-rw-r--r--test/CodeGen/X86/2010-11-18-SelectOfExtload.ll15
-rw-r--r--test/CodeGen/X86/2010-12-02-MC-Set.ll22
-rw-r--r--test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll19
-rw-r--r--test/CodeGen/X86/2011-01-10-DagCombineHang.ll15
-rw-r--r--test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll103
-rw-r--r--test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll14
-rw-r--r--test/CodeGen/X86/3addr-or.ll38
-rw-r--r--test/CodeGen/X86/abi-isel.ll942
-rw-r--r--test/CodeGen/X86/add-of-carry.ll34
-rw-r--r--test/CodeGen/X86/add.ll40
-rw-r--r--test/CodeGen/X86/addr-label-difference.ll2
-rw-r--r--test/CodeGen/X86/alldiv-divdi3.ll17
-rw-r--r--test/CodeGen/X86/andimm8.ll19
-rw-r--r--test/CodeGen/X86/apm.ll26
-rw-r--r--test/CodeGen/X86/atomic_op.ll71
-rw-r--r--test/CodeGen/X86/avx-128.ll2
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll33
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86_64.ll2
-rw-r--r--test/CodeGen/X86/bc-extract.ll27
-rw-r--r--test/CodeGen/X86/bit-test-shift.ll13
-rw-r--r--test/CodeGen/X86/bswap-inline-asm.ll11
-rw-r--r--test/CodeGen/X86/byval.ll11
-rw-r--r--test/CodeGen/X86/cmp-test.ll27
-rw-r--r--test/CodeGen/X86/cmp.ll92
-rw-r--r--test/CodeGen/X86/cmp0.ll24
-rw-r--r--test/CodeGen/X86/cmp2.ll18
-rw-r--r--test/CodeGen/X86/commute-two-addr.ll48
-rw-r--r--test/CodeGen/X86/compare-inf.ll16
-rw-r--r--test/CodeGen/X86/complex-asm.ll17
-rw-r--r--test/CodeGen/X86/conditional-indecrement.ll89
-rw-r--r--test/CodeGen/X86/const-select.ll22
-rw-r--r--test/CodeGen/X86/crash.ll58
-rw-r--r--test/CodeGen/X86/critical-edge-split-2.ll29
-rw-r--r--test/CodeGen/X86/critical-edge-split.ll50
-rw-r--r--test/CodeGen/X86/ctpop-combine.ll40
-rw-r--r--test/CodeGen/X86/dagcombine-buildvector.ll2
-rw-r--r--test/CodeGen/X86/dbg-live-in-location.ll84
-rw-r--r--test/CodeGen/X86/dbg-merge-loc-entry.ll71
-rw-r--r--test/CodeGen/X86/dbg-value-inlined-parameter.ll86
-rw-r--r--test/CodeGen/X86/dbg-value-location.ll70
-rw-r--r--test/CodeGen/X86/dbg-value-range.ll56
-rw-r--r--test/CodeGen/X86/div_const.ll7
-rw-r--r--test/CodeGen/X86/divide-by-constant.ll62
-rw-r--r--test/CodeGen/X86/dll-linkage.ll2
-rw-r--r--test/CodeGen/X86/dollar-name.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll23
-rw-r--r--test/CodeGen/X86/fast-isel-bc.ll16
-rw-r--r--test/CodeGen/X86/fast-isel-gep.ll17
-rw-r--r--test/CodeGen/X86/fast-isel-mem.ll18
-rw-r--r--test/CodeGen/X86/fltused.ll19
-rw-r--r--test/CodeGen/X86/fp-in-intregs.ll3
-rw-r--r--test/CodeGen/X86/fp-stack-compare.ll3
-rw-r--r--test/CodeGen/X86/ghc-cc.ll4
-rw-r--r--test/CodeGen/X86/global-sections.ll18
-rw-r--r--test/CodeGen/X86/inline-asm-h.ll12
-rw-r--r--test/CodeGen/X86/inline-asm-ptr-cast.ll27
-rw-r--r--test/CodeGen/X86/insertelement-legalize.ll2
-rw-r--r--test/CodeGen/X86/legalize-sub-zero-2.ll41
-rw-r--r--test/CodeGen/X86/legalize-sub-zero.ll35
-rw-r--r--test/CodeGen/X86/legalizedag_vec.ll8
-rw-r--r--test/CodeGen/X86/licm-symbol.ll2
-rw-r--r--test/CodeGen/X86/loop-blocks.ll11
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll15
-rw-r--r--test/CodeGen/X86/machine-cse.ll40
-rw-r--r--test/CodeGen/X86/memcmp.ll12
-rw-r--r--test/CodeGen/X86/memcpy.ll64
-rw-r--r--test/CodeGen/X86/memmove-0.ll9
-rw-r--r--test/CodeGen/X86/memmove-1.ll9
-rw-r--r--test/CodeGen/X86/memmove-2.ll9
-rw-r--r--test/CodeGen/X86/memmove-3.ll9
-rw-r--r--test/CodeGen/X86/memset-2.ll26
-rw-r--r--test/CodeGen/X86/memset64-on-x86-32.ll2
-rw-r--r--test/CodeGen/X86/mingw-alloca.ll4
-rw-r--r--test/CodeGen/X86/misaligned-memset.ll15
-rw-r--r--test/CodeGen/X86/mmx-arg-passing.ll19
-rw-r--r--test/CodeGen/X86/mmx-arg-passing2.ll14
-rw-r--r--test/CodeGen/X86/mmx-arith.ll380
-rw-r--r--test/CodeGen/X86/mmx-bitcast-to-i64.ll37
-rw-r--r--test/CodeGen/X86/mmx-builtins.ll1324
-rw-r--r--test/CodeGen/X86/mmx-insert-element.ll10
-rw-r--r--test/CodeGen/X86/mmx-pinsrw.ll2
-rw-r--r--test/CodeGen/X86/mmx-punpckhdq.ll19
-rw-r--r--test/CodeGen/X86/mmx-shift.ll24
-rw-r--r--test/CodeGen/X86/mmx-shuffle.ll6
-rw-r--r--test/CodeGen/X86/mmx-vzmovl-2.ll24
-rw-r--r--test/CodeGen/X86/mmx-vzmovl.ll4
-rw-r--r--test/CodeGen/X86/movgs.ll53
-rw-r--r--test/CodeGen/X86/mult-alt-generic-i686.ll321
-rw-r--r--test/CodeGen/X86/mult-alt-generic-x86_64.ll321
-rw-r--r--test/CodeGen/X86/mult-alt-x86.ll358
-rw-r--r--test/CodeGen/X86/narrow-shl-load.ll83
-rw-r--r--test/CodeGen/X86/negative-sin.ll4
-rw-r--r--test/CodeGen/X86/non-globl-eh-frame.ll24
-rw-r--r--test/CodeGen/X86/phi-immediate-factoring.ll2
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-2.ll2
-rw-r--r--test/CodeGen/X86/pic.ll24
-rw-r--r--test/CodeGen/X86/pic_jumptable.ll2
-rw-r--r--test/CodeGen/X86/popcnt.ll38
-rw-r--r--test/CodeGen/X86/postra-licm.ll2
-rw-r--r--test/CodeGen/X86/pr2659.ll7
-rw-r--r--test/CodeGen/X86/pr3522.ll2
-rw-r--r--test/CodeGen/X86/pr9127.ll12
-rw-r--r--test/CodeGen/X86/prefetch.ll10
-rw-r--r--test/CodeGen/X86/rodata-relocs.ll16
-rw-r--r--test/CodeGen/X86/scalar_widen_div.ll2
-rw-r--r--test/CodeGen/X86/select-aggregate.ll15
-rw-r--r--test/CodeGen/X86/select-zero-one.ll25
-rw-r--r--test/CodeGen/X86/select.ll239
-rw-r--r--test/CodeGen/X86/sext-select.ll23
-rw-r--r--test/CodeGen/X86/shift-folding.ll6
-rw-r--r--test/CodeGen/X86/sibcall-3.ll2
-rw-r--r--test/CodeGen/X86/sibcall-5.ll31
-rw-r--r--test/CodeGen/X86/sibcall.ll30
-rw-r--r--test/CodeGen/X86/sink-hoist.ll29
-rw-r--r--test/CodeGen/X86/split-select.ll7
-rw-r--r--test/CodeGen/X86/sse-align-11.ll3
-rw-r--r--test/CodeGen/X86/sse2.ll30
-rw-r--r--test/CodeGen/X86/sse3.ll17
-rw-r--r--test/CodeGen/X86/sse41.ll4
-rw-r--r--test/CodeGen/X86/stack-align.ll17
-rw-r--r--test/CodeGen/X86/stdcall-notailcall.ll13
-rw-r--r--test/CodeGen/X86/store-narrow.ll14
-rw-r--r--test/CodeGen/X86/store_op_load_fold2.ll2
-rw-r--r--test/CodeGen/X86/switch-bt.ll30
-rw-r--r--test/CodeGen/X86/switch-or.ll22
-rw-r--r--test/CodeGen/X86/tail-opts.ll23
-rw-r--r--test/CodeGen/X86/tailcall-largecode.ll8
-rw-r--r--test/CodeGen/X86/tailcall-ri64.ll24
-rw-r--r--test/CodeGen/X86/tailcall-stackalign.ll2
-rw-r--r--test/CodeGen/X86/tailcallfp2.ll4
-rw-r--r--test/CodeGen/X86/tailcallstack64.ll17
-rw-r--r--test/CodeGen/X86/tls-pic.ll16
-rw-r--r--test/CodeGen/X86/tls9.ll2
-rw-r--r--test/CodeGen/X86/tlv-1.ll (renamed from test/CodeGen/X86/tls-1.ll)16
-rw-r--r--test/CodeGen/X86/tlv-2.ll32
-rw-r--r--test/CodeGen/X86/twoaddr-lea.ll32
-rw-r--r--test/CodeGen/X86/uint64-to-float.ll21
-rw-r--r--test/CodeGen/X86/umul-with-overflow.ll8
-rw-r--r--test/CodeGen/X86/umulo-64.ll28
-rw-r--r--test/CodeGen/X86/unaligned-load.ll2
-rw-r--r--test/CodeGen/X86/unknown-location.ll10
-rw-r--r--test/CodeGen/X86/vec-sign.ll30
-rw-r--r--test/CodeGen/X86/vec-trunc-store.ll2
-rw-r--r--test/CodeGen/X86/vec_cast.ll1
-rw-r--r--test/CodeGen/X86/vec_compare-2.ll2
-rw-r--r--test/CodeGen/X86/vec_ext_inreg.ll1
-rw-r--r--test/CodeGen/X86/vec_insert-5.ll9
-rw-r--r--test/CodeGen/X86/vec_insert-7.ll15
-rw-r--r--test/CodeGen/X86/vec_select.ll12
-rw-r--r--test/CodeGen/X86/vec_set-F.ll6
-rw-r--r--test/CodeGen/X86/vec_shuffle-27.ll29
-rw-r--r--test/CodeGen/X86/vec_shuffle-30.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-37.ll10
-rw-r--r--test/CodeGen/X86/vec_zero_cse.ll5
-rw-r--r--test/CodeGen/X86/visibility.ll11
-rw-r--r--test/CodeGen/X86/vshift-1.ll2
-rw-r--r--test/CodeGen/X86/vshift-2.ll2
-rw-r--r--test/CodeGen/X86/vshift-3.ll2
-rw-r--r--test/CodeGen/X86/vshift-4.ll2
-rw-r--r--test/CodeGen/X86/vshift-5.ll2
-rw-r--r--test/CodeGen/X86/vsplit-and.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-1.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-2.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-3.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-4.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-5.ll2
-rw-r--r--test/CodeGen/X86/widen_arith-6.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-1.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-2.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-3.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-4.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-5.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-6.ll2
-rw-r--r--test/CodeGen/X86/widen_conv-1.ll2
-rw-r--r--test/CodeGen/X86/widen_conv-2.ll2
-rw-r--r--test/CodeGen/X86/widen_conv-3.ll2
-rw-r--r--test/CodeGen/X86/widen_conv-4.ll2
-rw-r--r--test/CodeGen/X86/widen_extract-1.ll2
-rw-r--r--test/CodeGen/X86/widen_load-1.ll2
-rw-r--r--test/CodeGen/X86/widen_load-2.ll2
-rw-r--r--test/CodeGen/X86/widen_select-1.ll12
-rw-r--r--test/CodeGen/X86/widen_shuffle-1.ll2
-rw-r--r--test/CodeGen/X86/win64_params.ll11
-rw-r--r--test/CodeGen/X86/win64_vararg.ll20
-rw-r--r--test/CodeGen/X86/win_chkstk.ll15
-rw-r--r--test/CodeGen/X86/x86-64-extend-shift.ll10
-rw-r--r--test/CodeGen/X86/x86_64-mul-by-const.ll9
-rw-r--r--test/CodeGen/X86/zext-extract_subreg.ll60
231 files changed, 6477 insertions, 1408 deletions
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index c39b82a..a662dd5 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
+; There are no MMX instructions here. We use add+adcl for the adds.
define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
entry:
@@ -7,9 +8,8 @@ entry:
bb26: ; preds = %bb26, %entry
-; CHECK: movq ({{.*}},8), %mm
-; CHECK: paddq ({{.*}},8), %mm
-; CHECK: paddq %mm{{[0-7]}}, %mm
+; CHECK: addl %e
+; CHECK: adcl %e
%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
@@ -27,3 +27,38 @@ bb31: ; preds = %bb26, %entry
%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
ret <1 x i64> %sum.035.1
}
+
+
+; This is the original test converted to use MMX intrinsics.
+
+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
+entry:
+ %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
+ %tmp2942 = icmp eq i32 %count, 0 ; <i1> [#uses=1]
+ br i1 %tmp2942, label %bb31, label %bb26
+
+bb26: ; preds = %bb26, %entry
+
+; CHECK: movq ({{.*}},8), %mm
+; CHECK: paddq ({{.*}},8), %mm
+; CHECK: paddq %mm{{[0-7]}}, %mm
+
+ %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
+ %sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
+ %tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
+ %tmp14 = load x86_mmx* %tmp13 ; <x86_mmx> [#uses=1]
+ %tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
+ %tmp19 = load x86_mmx* %tmp18 ; <x86_mmx> [#uses=1]
+ %tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14) ; <x86_mmx> [#uses=1]
+ %tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0) ; <x86_mmx> [#uses=2]
+ %tmp25 = add i32 %i.037.0, 1 ; <i32> [#uses=2]
+ %tmp29 = icmp ult i32 %tmp25, %count ; <i1> [#uses=1]
+ br i1 %tmp29, label %bb26, label %bb31
+
+bb31: ; preds = %bb26, %entry
+ %sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
+ %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
+ ret <1 x i64> %t
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 2093b8f..006cf2e 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -5,10 +5,10 @@ target triple = "i686-apple-darwin8"
define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
entry:
- %tmp4 = bitcast <1 x i64> %mask1 to <8 x i8> ; <<8 x i8>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %c64 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> %tmp6, <8 x i8> %tmp4, i8* %P )
+ %tmp4 = bitcast <1 x i64> %mask1 to x86_mmx ; <x86_mmx> [#uses=1]
+ %tmp6 = bitcast <1 x i64> %c64 to x86_mmx ; <x86_mmx> [#uses=1]
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 6128d8b..660d4fe 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,17 +1,16 @@
; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
-@R = external global <1 x i64> ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
define void @foo(<1 x i64> %A, <1 x i64> %B) {
entry:
- %tmp4 = bitcast <1 x i64> %B to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %A to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 ) ; <<4 x i16>> [#uses=1]
- %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64> ; <<1 x i64>> [#uses=1]
- store <1 x i64> %tmp8, <1 x i64>* @R
+ %tmp2 = bitcast <1 x i64> %A to x86_mmx
+ %tmp3 = bitcast <1 x i64> %B to x86_mmx
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=1]
+ store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 2c513f1..1c5e676 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -2,19 +2,17 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rdi, %mm1}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw %mm0, %mm1}
-@R = external global <1 x i64> ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
entry:
- %tmp4 = bitcast <1 x i64> %B to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %A to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 ) ; <<4 x i16>> [#uses=1]
- %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64> ; <<1 x i64>> [#uses=1]
- store <1 x i64> %tmp8, <1 x i64>* @R
+ %tmp4 = bitcast <1 x i64> %B to x86_mmx ; <<4 x i16>> [#uses=1]
+ %tmp6 = bitcast <1 x i64> %A to x86_mmx ; <<4 x i16>> [#uses=1]
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 ) ; <x86_mmx> [#uses=1]
+ store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
-
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
deleted file mode 100644
index 3f9845c..0000000
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86
-; ModuleID = 'bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
- %struct.wxPoint2DInt = type { i32, i32 }
-
-define x86_fp80 @_ZNK12wxPoint2DInt14GetVectorAngleEv(%struct.wxPoint2DInt* %this) {
-entry:
- br i1 false, label %cond_true, label %UnifiedReturnBlock
-
-cond_true: ; preds = %entry
- %tmp8 = load i32* null, align 4 ; <i32> [#uses=1]
- %tmp9 = icmp sgt i32 %tmp8, -1 ; <i1> [#uses=1]
- %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000 ; <x86_fp80> [#uses=1]
- ret x86_fp80 %retval
-
-UnifiedReturnBlock: ; preds = %entry
- ret x86_fp80 0xK4005B400000000000000
-}
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 7463a0e..bdacf50 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
; PR1909
@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index dc8c097..5089e8c 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -5,15 +5,15 @@ entry:
tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp1 = tail call <2 x i32> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; <<2 x i32>> [#uses=1]
+ %tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; <x86_mmx> [#uses=1]
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef ) nounwind ; <i32> [#uses=1]
+ %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind ; <i32> [#uses=1]
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef, i32 undef, i32 %tmp3 ) nounwind
+ tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> %tmp1 ) nounwind ; <i32> [#uses=0]
+ %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 500cd1f..8665282 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
@atomic = global double 0.000000e+00 ; <double*> [#uses=1]
@atomic2 = global double 0.000000e+00 ; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 98919ee..cf04dcf 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 75
; PR2539
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
external global <4 x float>, align 1 ; <<4 x float>*>:0 [#uses=2]
external global <4 x float>, align 1 ; <<4 x float>*>:1 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index c76dd7d..53402c0 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -17,11 +17,13 @@ entry:
br i1 false, label %bb.nph144.split, label %bb133
bb.nph144.split: ; preds = %entry
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> zeroinitializer, i8* null ) nounwind
+ %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+ %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
unreachable
bb133: ; preds = %entry
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*) nounwind
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 60be0d5..2dc1dea 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,6 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
-; PR2687
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
define <2 x double> @a(<2 x i32> %x) nounwind {
entry:
@@ -13,3 +16,20 @@ entry:
%y = fptosi <2 x double> %x to <2 x i32>
ret <2 x i32> %y
}
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+ %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+ ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+ %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+ ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 3c64fe4..86e50c9 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -15,14 +15,16 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
-@x = common global i32 0 ; <i32*> [#uses=1]
+@x = common global i32 0
define i32 @aci(i32* %pw) nounwind {
entry:
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
- %asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind ; <{ i32, i32 }> [#uses=0]
- %asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind ; <{ i32, i32 }> [#uses=1]
- %asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0 ; <i32> [#uses=1]
- %1 = add i32 %asmresult3, %0 ; <i32> [#uses=1]
- ret i32 %1
+ %0 = load i32* @x, align 4
+ %asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+ %asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+ %asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
+ %asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0
+ %1 = add i32 %asmresult2, %asmresult3
+ %2 = add i32 %0, %1
+ ret i32 %2
}
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index afeb358..9d144a4 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
entry:
br label %bb
@@ -11,6 +14,11 @@ bb: ; preds = %bb, %entry
%1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
br i1 %1, label %bb30.loopexit, label %bb
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
%2 = load i32* null, align 4 ; <i32> [#uses=1]
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 784bc72..3d0766c 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -1,8 +1,8 @@
; Linux doesn't support stack realignment for functions with allocas (PR2888).
; Until it does, we shouldn't use movaps to access the stack. On targets with
; sufficiently aligned stack (e.g. darwin) we should.
-
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
deleted file mode 100644
index 2e114ab..0000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define zeroext i16 @a(i16 zeroext %x) nounwind {
-entry:
- %div = udiv i16 %x, 33 ; <i32> [#uses=1]
- ret i16 %div
-}
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
deleted file mode 100644
index 7c811af..0000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define signext i16 @a(i16 signext %x) nounwind {
-entry:
- %div = sdiv i16 %x, 33 ; <i32> [#uses=1]
- ret i16 %div
-}
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index 9c71469..4feb764 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-legalize-types-checking
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index 8684f4a..8b3094b 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
; CHECK: .section __TEXT,__cstring,cstring_literals
-@x = internal constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
+@x = internal unnamed_addr constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
@y = global [1 x i8]* @x
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index bb01e5a..0b5b7bd 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
; rdar://6627786
; rdar://7792037
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 757042e..dd88235 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
-; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
+; RUN: grep {leaq.*TLSGD} %t2
+; RUN; grep {__tls_get_addr} %t2
; PR4004
@i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c598228..98b1e0e 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
define x86_fp80 @a(i64 %x) nounwind readnone {
entry:
- %conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
- ret x86_fp80 %conv
+ %conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
+ ret x86_fp80 %conv
}
-
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 810a6f4..12bd285 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -o %t1
-; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
-; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK: subq $40, %rsp
+; CHECK: movaps %xmm8, (%rsp)
+; CHECK: movaps %xmm7, 16(%rsp)
define i32 @a() nounwind {
entry:
- tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
- ret i32 undef
+ tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+ ret i32 undef
}
-
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 336f17e..01852a6 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | not grep movl
define <8 x i8> @a(i8 zeroext %x) nounwind {
%r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
index eb9378b..3669856 100644
--- a/test/CodeGen/X86/2009-07-07-SplitICmp.ll
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
%D = icmp sgt <2 x i32> %A, %B
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
index b9b09a3..288eef4 100644
--- a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -1,10 +1,12 @@
; RUN: llc < %s -march=x86-64
; PR4669
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
define <1 x i64> @test(i64 %t) {
entry:
%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
- %t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
- ret <1 x i64> %t2
+ %t0 = bitcast <1 x i64> %t1 to x86_mmx
+ %t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+ %t3 = bitcast x86_mmx %t2 to <1 x i64>
+ ret <1 x i64> %t3
}
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
index de32c21..f9b5f9e 100644
--- a/test/CodeGen/X86/2009-08-06-inlineasm.ll
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s
+; RUN: false
+; XRUN: llc -mtriple=i386-pc-linux-gnu < %s
; PR4668
; XFAIL: *
; FIXME: If the coalescer happens to coalesce %level.1 with the copy to EAX
; (for ret) then this will fail to compile. The fundamental problem is
; once the coalescer fixes a virtual register to physical register we can't
-; evict it.
+; evict it. This started passing again due to the changes for PR8969
+; so I've disabled it with a bigger stick.
define i32 @x(i32 %qscale) nounwind {
entry:
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index f9ca861..adac203 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -2,9 +2,9 @@
; This test shouldn't require spills.
-; CHECK: subq $8, %rsp
+; CHECK: pushq
; CHECK-NOT: $rsp
-; CHECK: addq $8, %rsp
+; CHECK: popq
%struct..0anon = type { i32 }
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index f7ba661..823e0ca 100644
--- a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -21,7 +21,7 @@ define void @leaf() nounwind {
; CHECK: leaf:
; CHECK-NOT: -8(%rsp)
; CHECK: leaq link_ptr@TLSGD
-; CHECK: call __tls_get_addr@PLT
+; CHECK: callq __tls_get_addr@PLT
"file foo2.c, line 14, bb1":
%p = alloca %test*, align 8 ; <%test**> [#uses=4]
br label %"file foo2.c, line 14, bb2"
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
index 76cc1a4..42f19b3 100644
--- a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
@@ -2,33 +2,27 @@
; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
; Check that DEBUG_VALUE comments come through on a variety of targets.
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
entry:
; CHECK: DEBUG_VALUE
- tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
- tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
- ret i32 3
+ call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+ ret i32 0, !dbg !10
}
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 4cd3be3..fa3d5fb 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,12 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
define void @ti8(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <8 x i8>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <8 x i8>
-; CHECK: movdq2q
%tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK: paddb %xmm1, %xmm0
store <8 x i8> %tmp3, <8 x i8>* null
ret void
}
@@ -14,10 +14,9 @@ entry:
define void @ti16(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <4 x i16>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <4 x i16>
-; CHECK: movdq2q
%tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK: paddw %xmm1, %xmm0
store <4 x i16> %tmp3, <4 x i16>* null
ret void
}
@@ -25,10 +24,9 @@ entry:
define void @ti32(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <2 x i32>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <2 x i32>
-; CHECK: movdq2q
%tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK: paddd %xmm1, %xmm0
store <2 x i32> %tmp3, <2 x i32>* null
ret void
}
@@ -36,10 +34,60 @@ entry:
define void @ti64(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <1 x i64>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <1 x i64>
-; CHECK: movdq2q
%tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK: addq %rax, %rcx
store <1 x i64> %tmp3, <1 x i64>* null
ret void
}
+
+; MMX intrinsics calls get us MMX instructions.
+
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index e20f1d8..3738f80 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -11,7 +11,7 @@ target triple = "i386-apple-darwin10.0.0"
; Verify that %esi gets spilled before the call.
; CHECK: Z4test1SiS
; CHECK: movl %esi,{{.*}}(%ebp)
-; CHECK: call __Z6throwsv
+; CHECK: calll __Z6throwsv
define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
entry:
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index d211549..f9bda7f 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
-; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: DW_OP_reg
%0 = type { double }
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 13f72a9..60171eb 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -61,6 +61,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 85
; CHECK-NEXT: .quad Ltmp3
-; CHECK-NEXT: .quad Lfunc_end
+; CHECK-NEXT: .quad Ltmp6
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 83
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 80643d0..ad8546e 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -39,6 +39,6 @@ entry:
!13 = metadata !{i32 7, i32 0, metadata !14, null}
!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
-;CHECK: DEBUG_VALUE: bar:x <- EBX+0
-;CHECK-NEXT:Ltmp
-;CHECK-NEXT: DEBUG_VALUE: foo:y <- 1+0
+;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: Ltmp
+;CHECK: DEBUG_VALUE: foo:y <- 1+0
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index c6421a2..6db3ce1 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
; <rdar://problem/8124405>
%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
@@ -21,9 +21,9 @@ bb:
; statement. It can be an ADD or LEA instruction, it's not important which one
; it is.
;
-; CHECK: ## %bb
-; CHECK-NEXT: addq $64036, %rdi
-; CHECK: rep;stosl
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
%tmp5 = bitcast i32* %tmp4 to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
diff --git a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
index cb47d20..0bbb24f 100644
--- a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
+++ b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
@@ -3,7 +3,7 @@
define void @foo() nounwind ssp {
entry:
; CHECK: foo
-; CHECK: subq $8, %rsp
+; CHECK: pushq
; CHECK: int $3
call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
@@ -18,7 +18,7 @@ return: ; preds = %entry
define void @bar() nounwind ssp {
entry:
; CHECK: bar
-; CHECK-NOT: subq $8, %rsp
+; CHECK-NOT: pushq
; CHECK: int $3
call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
new file mode 100644
index 0000000..bed8c8a
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin10 < %s - | FileCheck %s
+; Radar 8286101
+; CHECK: .file 2 "<stdin>"
+
+define i32 @foo() nounwind ssp {
+entry:
+ ret i32 42, !dbg !8
+}
+
+define i32 @bar() nounwind ssp {
+entry:
+ ret i32 21, !dbg !10
+}
+
+!llvm.dbg.sp = !{!0, !6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 53, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 4, i32 13, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 0000000..9bbd691
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+ br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader: ; preds = %entry
+ %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+ br label %while.cond
+
+sem_check_validity.exit: ; preds = %entry
+ ret void
+
+while.cond: ; preds = %while.body, %while.cond.preheader
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %while.cond
+ %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09 sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+ br i1 undef, label %while.cond, label %return
+
+while.end: ; preds = %while.cond
+ br i1 undef, label %if.end18, label %return
+
+if.end18: ; preds = %while.end
+ unreachable
+
+return: ; preds = %while.end, %while.body
+ ret void
+}
+
+!0 = metadata !{i32 158484}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 0000000..8fe0309
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+ %a = alloca [64 x i8]
+ %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+ %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+ %d = load i8* %b, align 8
+ %e = load i8* %c, align 8
+ %f = bitcast [64 x i8]* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+ store i8 %d, i8* %b, align 8
+ store i8 %e, i8* %c, align 8
+ ret i32 0
+}
+
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movb 30(%rsp), %dl
+; CHECK: movb (%rsp), %sil
+; CHECK: movb %sil, (%rsp)
+; CHECK: movb %dl, 30(%rsp)
+; CHECK: callq ___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 0000000..cae81d0
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -cgp-critical-edge-splitting=0 -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+ %0 = icmp eq i8 undef, 0
+ %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+ %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+ switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+ i32 3, label %bb1.i
+ i32 4, label %bb1.i1237
+ i32 5, label %bb1.i1266
+ i32 6, label %bb1.i1275
+ i32 2, label %bb1.i1434
+ i32 8, label %bb1.i1523
+ i32 7, label %bb1.i1537
+ ]
+
+bb1.i: ; preds = %bb.i
+ unreachable
+
+bb1.i1237: ; preds = %bb.i
+ br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241: ; preds = %bb1.i1237
+ unreachable
+
+bb1.i1266: ; preds = %bb.i
+ unreachable
+
+bb1.i1275: ; preds = %bb.i
+ unreachable
+
+bb1.i1434: ; preds = %bb.i
+ unreachable
+
+bb1.i1523: ; preds = %bb.i
+ unreachable
+
+bb1.i1537: ; preds = %bb.i
+ unreachable
+
+bb.i1820: ; preds = %bb1.i1237
+ br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+ %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+ br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+ %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+ unreachable
+
+bb141: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+ unreachable
+}
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 0000000..40e7f01
--- /dev/null
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+ br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+ %r = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %ptr, i64 0, i64 1)
+ %stored1 = icmp eq i64 %r, 0
+ br i1 %stored1, label %loop, label %continue
+continue:
+ ret void
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 0000000..79c0cf3
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+ tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+ ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 0000000..2368f3f
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"int[]" = type [4 x i32]
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+ %a_addr = alloca i8*
+ %b_addr = alloca %0*
+ %c_addr = alloca %0*
+ %"alloca point" = bitcast i32 0 to i32
+ store i8* %a, i8** %a_addr
+ store %0* %b, %0** %b_addr
+ store %0* %c, %0** %c_addr
+ %0 = load i8** %a_addr, align 64
+ %1 = load %0** %b_addr, align 64
+ %2 = load %0** %c_addr, align 64
+ %"ssa point" = bitcast i32 0 to i32
+ br label %"2"
+
+"2": ; preds = %entry
+ %3 = bitcast i8* %0 to <2 x i32>*
+ %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %5 = bitcast %"int[]"* %4 to <4 x float>*
+ %6 = load <4 x float>* %5, align 16
+ %7 = bitcast <2 x i32>* %3 to <2 x float>*
+ %8 = bitcast <2 x float>* %7 to double*
+ %9 = load double* %8
+ %10 = insertelement <2 x double> undef, double %9, i32 0
+ %11 = insertelement <2 x double> %10, double undef, i32 1
+ %12 = bitcast <2 x double> %11 to <4 x float>
+ %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %15 = bitcast %"int[]"* %14 to <4 x float>*
+ store <4 x float> %13, <4 x float>* %15, align 16
+ %16 = bitcast i8* %0 to <2 x i32>*
+ %17 = bitcast <2 x i32>* %16 to i8*
+ %18 = getelementptr i8* %17, i64 8
+ %19 = bitcast i8* %18 to <2 x i32>*
+ %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %21 = bitcast %"int[]"* %20 to <4 x float>*
+ %22 = load <4 x float>* %21, align 16
+ %23 = bitcast <2 x i32>* %19 to <2 x float>*
+ %24 = bitcast <2 x float>* %23 to double*
+ %25 = load double* %24
+ %26 = insertelement <2 x double> undef, double %25, i32 0
+ %27 = insertelement <2 x double> %26, double undef, i32 1
+ %28 = bitcast <2 x double> %27 to <4 x float>
+ %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %31 = bitcast %"int[]"* %30 to <4 x float>*
+ store <4 x float> %29, <4 x float>* %31, align 16
+ br label %return
+
+return: ; preds = %"2"
+ ret void
+}
diff --git a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
new file mode 100644
index 0000000..a1074b6
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Both values were being zero extended.
+@u = external global i8
+@s = external global i8
+define i32 @foo(i1 %cond) {
+; CHECK: @foo
+ %u_base = load i8* @u
+ %u_val = zext i8 %u_base to i32
+; CHECK: movzbl
+; CHECK: movsbl
+ %s_base = load i8* @s
+ %s_val = sext i8 %s_base to i32
+ %val = select i1 %cond, i32 %u_val, i32 %s_val
+ ret i32 %val
+}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
new file mode 100644
index 0000000..3144678
--- /dev/null
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-dot-loc -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
+
+define void @foo() nounwind ssp {
+entry:
+ ret void, !dbg !5
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 1, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+
+; CHECK: .subsections_via_symbols
+; CHECK-NEXT: __debug_line
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
new file mode 100644
index 0000000..b9cf65b
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -enable-legalize-types-checking
+; PR8582
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %i17 = icmp eq <4 x i8> undef, zeroinitializer
+ %cond = extractelement <4 x i1> %i17, i32 0
+ %_comp = select i1 %cond, i8 0, i8 undef
+ %merge = insertelement <4 x i8> undef, i8 %_comp, i32 0
+ %cond3 = extractelement <4 x i1> %i17, i32 1
+ %_comp4 = select i1 %cond3, i8 0, i8 undef
+ %merge5 = insertelement <4 x i8> %merge, i8 %_comp4, i32 1
+ %cond8 = extractelement <4 x i1> %i17, i32 2
+ %_comp9 = select i1 %cond8, i8 0, i8 undef
+ %m387 = insertelement <4 x i8> %merge5, i8 %_comp9, i32 2
+ store <4 x i8> %m387, <4 x i8>* undef
+ ret void
+}
diff --git a/test/CodeGen/X86/2011-01-10-DagCombineHang.ll b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
new file mode 100644
index 0000000..bf438b8
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; This formerly got DagCombine into a loop, PR 8916.
+
+define i32 @foo(i64 %x, i64 %y, i64 %z, i32 %a, i32 %b) {
+entry:
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %t1 = shl i64 %x, 15
+ %t2 = and i64 %t1, 4294934528
+ %t3 = or i64 %t2, %y
+ %t4 = xor i64 %z, %t3
+ %t5 = trunc i64 %t4 to i32
+ %t6 = add i32 %a, %t5
+ %t7 = add i32 %t6, %b
+ ret i32 %t7
+}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
new file mode 100644
index 0000000..973975b
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Check debug info for variable z_s
+;CHECK: .ascii "z_s" ## DW_AT_name
+;CHECK-NEXT: .byte 0
+;CHECK-NEXT: ## DW_AT_decl_file
+;CHECK-NEXT: ## DW_AT_decl_line
+;CHECK-NEXT: ## DW_AT_type
+;CHECK-NEXT: ## DW_AT_location
+
+
+@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
+@str = internal constant [21 x i8] c"Failing test vector:\00"
+
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+entry:
+ tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
+ tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+ br label %while.body, !dbg !20
+
+while.body: ; preds = %while.body, %entry
+ %b.addr.0 = phi i64 [ %b, %entry ], [ %rem, %while.body ]
+ %a.addr.0 = phi i64 [ %a, %entry ], [ %b.addr.0, %while.body ]
+ %rem = srem i64 %a.addr.0, %b.addr.0, !dbg !21
+ %cmp = icmp eq i64 %rem, 0, !dbg !23
+ br i1 %cmp, label %if.then, label %while.body, !dbg !23
+
+if.then: ; preds = %while.body
+ tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+ ret i64 %b.addr.0, !dbg !23
+}
+
+define i32 @main() nounwind optsize ssp {
+entry:
+ %call = tail call i32 @rand() nounwind optsize, !dbg !24
+ tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+ %cmp = icmp ugt i32 %call, 21, !dbg !25
+ br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
+
+cond.true: ; preds = %entry
+ %call1 = tail call i32 @rand() nounwind optsize, !dbg !25
+ br label %cond.end, !dbg !25
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
+ tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+ %conv = sext i32 %cond to i64, !dbg !26
+ %conv5 = zext i32 %call to i64, !dbg !26
+ %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
+ %cmp7 = icmp eq i64 %call6, 0
+ br i1 %cmp7, label %return, label %if.then, !dbg !26
+
+if.then: ; preds = %cond.end
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
+ %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+ ret i32 1, !dbg !27
+
+return: ; preds = %cond.end
+ ret i32 0, !dbg !27
+}
+
+declare i32 @rand() optsize
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.gcd = !{!10, !11, !12}
+!llvm.dbg.lv.main = !{!14, !17}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 5, i32 41, metadata !0, null}
+!19 = metadata !{i32 5, i32 49, metadata !0, null}
+!20 = metadata !{i32 7, i32 5, metadata !13, null}
+!21 = metadata !{i32 8, i32 9, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 9, metadata !22, null}
+!24 = metadata !{i32 26, i32 38, metadata !15, null}
+!25 = metadata !{i32 27, i32 38, metadata !15, null}
+!26 = metadata !{i32 28, i32 9, metadata !15, null}
+!27 = metadata !{i32 30, i32 1, metadata !15, null}
diff --git a/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
new file mode 100644
index 0000000..cedd6a2
--- /dev/null
+++ b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @foo()
+
+define i32 @bar() nounwind {
+; CHECK: bar
+; CHECK-NOT: pop.*ax
+ %call = call i32 @foo()
+ ret i32 %call
+}
+
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 30a1f36..912bdc2 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; rdar://7527734
-define i32 @test(i32 %x) nounwind readnone ssp {
+define i32 @test1(i32 %x) nounwind readnone ssp {
entry:
-; CHECK: test:
+; CHECK: test1:
; CHECK: leal 3(%rdi), %eax
%0 = shl i32 %x, 5 ; <i32> [#uses=1]
%1 = or i32 %0, 3 ; <i32> [#uses=1]
@@ -25,3 +25,37 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
%H = or i64 %G, %E ; <i64> [#uses=1]
ret i64 %H
}
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+ %0 = shl i32 %x, 5
+ %1 = or i32 %0, 3
+ store i32 %1, i32* %P
+ ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+ %and = and i32 %a, 6
+ %and2 = and i32 %b, 16
+ %or = or i32 %and2, %and
+ ret i32 %or
+; CHECK: test4:
+; CHECK: leal (%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+ %and = and i32 %a, 6
+ %and2 = and i32 %b, 16
+ %or = or i32 %and2, %and
+ store i32 %or, i32* %P, align 4
+ ret void
+; CHECK: test5:
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 23042b6..5b4d79f 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
@src = external global [131072 x i32]
@dst = external global [131072 x i32]
@@ -72,7 +72,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo00:
-; DARWIN-32-PIC: call L0$pb
+; DARWIN-32-PIC: calll L0$pb
; DARWIN-32-PIC-NEXT: L0$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L0$pb(%eax), %ecx
@@ -144,7 +144,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo00:
-; DARWIN-32-PIC: call L1$pb
+; DARWIN-32-PIC: calll L1$pb
; DARWIN-32-PIC-NEXT: L1$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
@@ -208,7 +208,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo01:
-; DARWIN-32-PIC: call L2$pb
+; DARWIN-32-PIC: calll L2$pb
; DARWIN-32-PIC-NEXT: L2$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
@@ -268,7 +268,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo01:
-; DARWIN-32-PIC: call L3$pb
+; DARWIN-32-PIC: calll L3$pb
; DARWIN-32-PIC-NEXT: L3$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
@@ -342,7 +342,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo02:
-; DARWIN-32-PIC: call L4$pb
+; DARWIN-32-PIC: calll L4$pb
; DARWIN-32-PIC-NEXT: L4$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L4$pb(%eax), %ecx
@@ -424,7 +424,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo02:
-; DARWIN-32-PIC: call L5$pb
+; DARWIN-32-PIC: calll L5$pb
; DARWIN-32-PIC-NEXT: L5$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
@@ -497,7 +497,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo03:
-; DARWIN-32-PIC: call L6$pb
+; DARWIN-32-PIC: calll L6$pb
; DARWIN-32-PIC-NEXT: L6$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dsrc-L6$pb(%eax), %ecx
@@ -551,7 +551,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo04:
-; DARWIN-32-PIC: call L7$pb
+; DARWIN-32-PIC: calll L7$pb
; DARWIN-32-PIC-NEXT: L7$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L7$pb(%eax), %ecx
@@ -619,7 +619,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo05:
-; DARWIN-32-PIC: call L8$pb
+; DARWIN-32-PIC: calll L8$pb
; DARWIN-32-PIC-NEXT: L8$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dsrc-L8$pb(%eax), %ecx
@@ -682,7 +682,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo06:
-; DARWIN-32-PIC: call L9$pb
+; DARWIN-32-PIC: calll L9$pb
; DARWIN-32-PIC-NEXT: L9$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lsrc-L9$pb(%eax), %ecx
@@ -735,7 +735,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo07:
-; DARWIN-32-PIC: call L10$pb
+; DARWIN-32-PIC: calll L10$pb
; DARWIN-32-PIC-NEXT: L10$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L10$pb(%eax), %ecx
@@ -801,7 +801,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo08:
-; DARWIN-32-PIC: call L11$pb
+; DARWIN-32-PIC: calll L11$pb
; DARWIN-32-PIC-NEXT: L11$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lsrc-L11$pb(%eax), %ecx
@@ -868,7 +868,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux00:
-; DARWIN-32-PIC: call L12$pb
+; DARWIN-32-PIC: calll L12$pb
; DARWIN-32-PIC-NEXT: L12$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L12$pb(%eax), %ecx
@@ -939,7 +939,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx00:
-; DARWIN-32-PIC: call L13$pb
+; DARWIN-32-PIC: calll L13$pb
; DARWIN-32-PIC-NEXT: L13$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
@@ -1005,7 +1005,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux01:
-; DARWIN-32-PIC: call L14$pb
+; DARWIN-32-PIC: calll L14$pb
; DARWIN-32-PIC-NEXT: L14$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
@@ -1071,7 +1071,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx01:
-; DARWIN-32-PIC: call L15$pb
+; DARWIN-32-PIC: calll L15$pb
; DARWIN-32-PIC-NEXT: L15$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
@@ -1150,7 +1150,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux02:
-; DARWIN-32-PIC: call L16$pb
+; DARWIN-32-PIC: calll L16$pb
; DARWIN-32-PIC-NEXT: L16$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L16$pb(%eax), %ecx
@@ -1233,7 +1233,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx02:
-; DARWIN-32-PIC: call L17$pb
+; DARWIN-32-PIC: calll L17$pb
; DARWIN-32-PIC-NEXT: L17$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
@@ -1306,7 +1306,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux03:
-; DARWIN-32-PIC: call L18$pb
+; DARWIN-32-PIC: calll L18$pb
; DARWIN-32-PIC-NEXT: L18$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L18$pb)+64(%eax), %ecx
@@ -1361,7 +1361,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux04:
-; DARWIN-32-PIC: call L19$pb
+; DARWIN-32-PIC: calll L19$pb
; DARWIN-32-PIC-NEXT: L19$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L19$pb)+64(%eax), %ecx
@@ -1430,7 +1430,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux05:
-; DARWIN-32-PIC: call L20$pb
+; DARWIN-32-PIC: calll L20$pb
; DARWIN-32-PIC-NEXT: L20$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L20$pb)+64(%eax), %ecx
@@ -1493,7 +1493,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux06:
-; DARWIN-32-PIC: call L21$pb
+; DARWIN-32-PIC: calll L21$pb
; DARWIN-32-PIC-NEXT: L21$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L21$pb)+64(%eax), %ecx
@@ -1546,7 +1546,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux07:
-; DARWIN-32-PIC: call L22$pb
+; DARWIN-32-PIC: calll L22$pb
; DARWIN-32-PIC-NEXT: L22$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L22$pb)+64(%eax), %ecx
@@ -1613,7 +1613,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux08:
-; DARWIN-32-PIC: call L23$pb
+; DARWIN-32-PIC: calll L23$pb
; DARWIN-32-PIC-NEXT: L23$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L23$pb)+64(%eax), %ecx
@@ -1686,7 +1686,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind00:
-; DARWIN-32-PIC: call L24$pb
+; DARWIN-32-PIC: calll L24$pb
; DARWIN-32-PIC-NEXT: L24$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1764,7 +1764,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd00:
-; DARWIN-32-PIC: call L25$pb
+; DARWIN-32-PIC: calll L25$pb
; DARWIN-32-PIC-NEXT: L25$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1840,7 +1840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind01:
-; DARWIN-32-PIC: call L26$pb
+; DARWIN-32-PIC: calll L26$pb
; DARWIN-32-PIC-NEXT: L26$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1916,7 +1916,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd01:
-; DARWIN-32-PIC: call L27$pb
+; DARWIN-32-PIC: calll L27$pb
; DARWIN-32-PIC-NEXT: L27$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2001,7 +2001,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind02:
-; DARWIN-32-PIC: call L28$pb
+; DARWIN-32-PIC: calll L28$pb
; DARWIN-32-PIC-NEXT: L28$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2090,7 +2090,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd02:
-; DARWIN-32-PIC: call L29$pb
+; DARWIN-32-PIC: calll L29$pb
; DARWIN-32-PIC-NEXT: L29$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2170,7 +2170,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind03:
-; DARWIN-32-PIC: call L30$pb
+; DARWIN-32-PIC: calll L30$pb
; DARWIN-32-PIC-NEXT: L30$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2242,7 +2242,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind04:
-; DARWIN-32-PIC: call L31$pb
+; DARWIN-32-PIC: calll L31$pb
; DARWIN-32-PIC-NEXT: L31$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2320,7 +2320,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind05:
-; DARWIN-32-PIC: call L32$pb
+; DARWIN-32-PIC: calll L32$pb
; DARWIN-32-PIC-NEXT: L32$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2395,7 +2395,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind06:
-; DARWIN-32-PIC: call L33$pb
+; DARWIN-32-PIC: calll L33$pb
; DARWIN-32-PIC-NEXT: L33$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2466,7 +2466,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind07:
-; DARWIN-32-PIC: call L34$pb
+; DARWIN-32-PIC: calll L34$pb
; DARWIN-32-PIC-NEXT: L34$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2543,7 +2543,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind08:
-; DARWIN-32-PIC: call L35$pb
+; DARWIN-32-PIC: calll L35$pb
; DARWIN-32-PIC-NEXT: L35$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2621,7 +2621,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off00:
-; DARWIN-32-PIC: call L36$pb
+; DARWIN-32-PIC: calll L36$pb
; DARWIN-32-PIC-NEXT: L36$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2700,7 +2700,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf00:
-; DARWIN-32-PIC: call L37$pb
+; DARWIN-32-PIC: calll L37$pb
; DARWIN-32-PIC-NEXT: L37$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2777,7 +2777,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off01:
-; DARWIN-32-PIC: call L38$pb
+; DARWIN-32-PIC: calll L38$pb
; DARWIN-32-PIC-NEXT: L38$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2854,7 +2854,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf01:
-; DARWIN-32-PIC: call L39$pb
+; DARWIN-32-PIC: calll L39$pb
; DARWIN-32-PIC-NEXT: L39$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2940,7 +2940,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off02:
-; DARWIN-32-PIC: call L40$pb
+; DARWIN-32-PIC: calll L40$pb
; DARWIN-32-PIC-NEXT: L40$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3030,7 +3030,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf02:
-; DARWIN-32-PIC: call L41$pb
+; DARWIN-32-PIC: calll L41$pb
; DARWIN-32-PIC-NEXT: L41$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3111,7 +3111,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off03:
-; DARWIN-32-PIC: call L42$pb
+; DARWIN-32-PIC: calll L42$pb
; DARWIN-32-PIC-NEXT: L42$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3184,7 +3184,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off04:
-; DARWIN-32-PIC: call L43$pb
+; DARWIN-32-PIC: calll L43$pb
; DARWIN-32-PIC-NEXT: L43$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3263,7 +3263,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off05:
-; DARWIN-32-PIC: call L44$pb
+; DARWIN-32-PIC: calll L44$pb
; DARWIN-32-PIC-NEXT: L44$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3339,7 +3339,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off06:
-; DARWIN-32-PIC: call L45$pb
+; DARWIN-32-PIC: calll L45$pb
; DARWIN-32-PIC-NEXT: L45$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3411,7 +3411,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off07:
-; DARWIN-32-PIC: call L46$pb
+; DARWIN-32-PIC: calll L46$pb
; DARWIN-32-PIC-NEXT: L46$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3489,7 +3489,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off08:
-; DARWIN-32-PIC: call L47$pb
+; DARWIN-32-PIC: calll L47$pb
; DARWIN-32-PIC-NEXT: L47$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3560,7 +3560,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo00:
-; DARWIN-32-PIC: call L48$pb
+; DARWIN-32-PIC: calll L48$pb
; DARWIN-32-PIC-NEXT: L48$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L48$pb(%eax), %ecx
@@ -3626,7 +3626,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo01:
-; DARWIN-32-PIC: call L49$pb
+; DARWIN-32-PIC: calll L49$pb
; DARWIN-32-PIC-NEXT: L49$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl $262144, %ecx
@@ -3705,7 +3705,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo02:
-; DARWIN-32-PIC: call L50$pb
+; DARWIN-32-PIC: calll L50$pb
; DARWIN-32-PIC-NEXT: L50$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L50$pb(%eax), %ecx
@@ -3778,7 +3778,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo03:
-; DARWIN-32-PIC: call L51$pb
+; DARWIN-32-PIC: calll L51$pb
; DARWIN-32-PIC-NEXT: L51$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L51$pb)+262144(%eax), %ecx
@@ -3833,7 +3833,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo04:
-; DARWIN-32-PIC: call L52$pb
+; DARWIN-32-PIC: calll L52$pb
; DARWIN-32-PIC-NEXT: L52$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L52$pb)+262144(%eax), %ecx
@@ -3902,7 +3902,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo05:
-; DARWIN-32-PIC: call L53$pb
+; DARWIN-32-PIC: calll L53$pb
; DARWIN-32-PIC-NEXT: L53$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L53$pb)+262144(%eax), %ecx
@@ -3965,7 +3965,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo06:
-; DARWIN-32-PIC: call L54$pb
+; DARWIN-32-PIC: calll L54$pb
; DARWIN-32-PIC-NEXT: L54$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L54$pb)+262144(%eax), %ecx
@@ -4018,7 +4018,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo07:
-; DARWIN-32-PIC: call L55$pb
+; DARWIN-32-PIC: calll L55$pb
; DARWIN-32-PIC-NEXT: L55$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L55$pb)+262144(%eax), %ecx
@@ -4085,7 +4085,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo08:
-; DARWIN-32-PIC: call L56$pb
+; DARWIN-32-PIC: calll L56$pb
; DARWIN-32-PIC-NEXT: L56$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L56$pb)+262144(%eax), %ecx
@@ -4159,7 +4159,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big00:
-; DARWIN-32-PIC: call L57$pb
+; DARWIN-32-PIC: calll L57$pb
; DARWIN-32-PIC-NEXT: L57$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4236,7 +4236,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big01:
-; DARWIN-32-PIC: call L58$pb
+; DARWIN-32-PIC: calll L58$pb
; DARWIN-32-PIC-NEXT: L58$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4322,7 +4322,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big02:
-; DARWIN-32-PIC: call L59$pb
+; DARWIN-32-PIC: calll L59$pb
; DARWIN-32-PIC-NEXT: L59$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4403,7 +4403,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big03:
-; DARWIN-32-PIC: call L60$pb
+; DARWIN-32-PIC: calll L60$pb
; DARWIN-32-PIC-NEXT: L60$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4476,7 +4476,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big04:
-; DARWIN-32-PIC: call L61$pb
+; DARWIN-32-PIC: calll L61$pb
; DARWIN-32-PIC-NEXT: L61$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4555,7 +4555,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big05:
-; DARWIN-32-PIC: call L62$pb
+; DARWIN-32-PIC: calll L62$pb
; DARWIN-32-PIC-NEXT: L62$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4631,7 +4631,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big06:
-; DARWIN-32-PIC: call L63$pb
+; DARWIN-32-PIC: calll L63$pb
; DARWIN-32-PIC-NEXT: L63$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4703,7 +4703,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big07:
-; DARWIN-32-PIC: call L64$pb
+; DARWIN-32-PIC: calll L64$pb
; DARWIN-32-PIC-NEXT: L64$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4781,7 +4781,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big08:
-; DARWIN-32-PIC: call L65$pb
+; DARWIN-32-PIC: calll L65$pb
; DARWIN-32-PIC-NEXT: L65$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4840,7 +4840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar00:
-; DARWIN-32-PIC: call L66$pb
+; DARWIN-32-PIC: calll L66$pb
; DARWIN-32-PIC-NEXT: L66$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L66$pb(%eax), %eax
@@ -4887,7 +4887,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxr00:
-; DARWIN-32-PIC: call L67$pb
+; DARWIN-32-PIC: calll L67$pb
; DARWIN-32-PIC-NEXT: L67$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
@@ -4934,7 +4934,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar01:
-; DARWIN-32-PIC: call L68$pb
+; DARWIN-32-PIC: calll L68$pb
; DARWIN-32-PIC-NEXT: L68$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L68$pb(%eax), %eax
@@ -4981,7 +4981,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxr01:
-; DARWIN-32-PIC: call L69$pb
+; DARWIN-32-PIC: calll L69$pb
; DARWIN-32-PIC-NEXT: L69$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
@@ -5028,7 +5028,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar02:
-; DARWIN-32-PIC: call L70$pb
+; DARWIN-32-PIC: calll L70$pb
; DARWIN-32-PIC-NEXT: L70$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
@@ -5075,7 +5075,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar03:
-; DARWIN-32-PIC: call L71$pb
+; DARWIN-32-PIC: calll L71$pb
; DARWIN-32-PIC-NEXT: L71$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dsrc-L71$pb(%eax), %eax
@@ -5122,7 +5122,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar04:
-; DARWIN-32-PIC: call L72$pb
+; DARWIN-32-PIC: calll L72$pb
; DARWIN-32-PIC-NEXT: L72$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L72$pb(%eax), %eax
@@ -5169,7 +5169,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar05:
-; DARWIN-32-PIC: call L73$pb
+; DARWIN-32-PIC: calll L73$pb
; DARWIN-32-PIC-NEXT: L73$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dptr-L73$pb(%eax), %eax
@@ -5216,7 +5216,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar06:
-; DARWIN-32-PIC: call L74$pb
+; DARWIN-32-PIC: calll L74$pb
; DARWIN-32-PIC-NEXT: L74$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lsrc-L74$pb(%eax), %eax
@@ -5263,7 +5263,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar07:
-; DARWIN-32-PIC: call L75$pb
+; DARWIN-32-PIC: calll L75$pb
; DARWIN-32-PIC-NEXT: L75$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L75$pb(%eax), %eax
@@ -5310,7 +5310,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar08:
-; DARWIN-32-PIC: call L76$pb
+; DARWIN-32-PIC: calll L76$pb
; DARWIN-32-PIC-NEXT: L76$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lptr-L76$pb(%eax), %eax
@@ -5357,7 +5357,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har00:
-; DARWIN-32-PIC: call L77$pb
+; DARWIN-32-PIC: calll L77$pb
; DARWIN-32-PIC-NEXT: L77$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L77$pb(%eax), %eax
@@ -5404,7 +5404,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _hxr00:
-; DARWIN-32-PIC: call L78$pb
+; DARWIN-32-PIC: calll L78$pb
; DARWIN-32-PIC-NEXT: L78$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
@@ -5451,7 +5451,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har01:
-; DARWIN-32-PIC: call L79$pb
+; DARWIN-32-PIC: calll L79$pb
; DARWIN-32-PIC-NEXT: L79$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L79$pb(%eax), %eax
@@ -5498,7 +5498,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _hxr01:
-; DARWIN-32-PIC: call L80$pb
+; DARWIN-32-PIC: calll L80$pb
; DARWIN-32-PIC-NEXT: L80$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
@@ -5549,7 +5549,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har02:
-; DARWIN-32-PIC: call L81$pb
+; DARWIN-32-PIC: calll L81$pb
; DARWIN-32-PIC-NEXT: L81$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
@@ -5600,7 +5600,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har03:
-; DARWIN-32-PIC: call L82$pb
+; DARWIN-32-PIC: calll L82$pb
; DARWIN-32-PIC-NEXT: L82$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dsrc-L82$pb(%eax), %eax
@@ -5647,7 +5647,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har04:
-; DARWIN-32-PIC: call L83$pb
+; DARWIN-32-PIC: calll L83$pb
; DARWIN-32-PIC-NEXT: L83$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L83$pb(%eax), %eax
@@ -5697,7 +5697,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har05:
-; DARWIN-32-PIC: call L84$pb
+; DARWIN-32-PIC: calll L84$pb
; DARWIN-32-PIC-NEXT: L84$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dptr-L84$pb(%eax), %eax
@@ -5744,7 +5744,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har06:
-; DARWIN-32-PIC: call L85$pb
+; DARWIN-32-PIC: calll L85$pb
; DARWIN-32-PIC-NEXT: L85$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lsrc-L85$pb(%eax), %eax
@@ -5791,7 +5791,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har07:
-; DARWIN-32-PIC: call L86$pb
+; DARWIN-32-PIC: calll L86$pb
; DARWIN-32-PIC-NEXT: L86$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L86$pb(%eax), %eax
@@ -5840,7 +5840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har08:
-; DARWIN-32-PIC: call L87$pb
+; DARWIN-32-PIC: calll L87$pb
; DARWIN-32-PIC-NEXT: L87$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lptr-L87$pb(%eax), %eax
@@ -5889,7 +5889,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat00:
-; DARWIN-32-PIC: call L88$pb
+; DARWIN-32-PIC: calll L88$pb
; DARWIN-32-PIC-NEXT: L88$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L88$pb(%eax), %eax
@@ -5942,7 +5942,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxt00:
-; DARWIN-32-PIC: call L89$pb
+; DARWIN-32-PIC: calll L89$pb
; DARWIN-32-PIC-NEXT: L89$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
@@ -5995,7 +5995,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat01:
-; DARWIN-32-PIC: call L90$pb
+; DARWIN-32-PIC: calll L90$pb
; DARWIN-32-PIC-NEXT: L90$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L90$pb(%eax), %eax
@@ -6048,7 +6048,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxt01:
-; DARWIN-32-PIC: call L91$pb
+; DARWIN-32-PIC: calll L91$pb
; DARWIN-32-PIC-NEXT: L91$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
@@ -6110,7 +6110,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat02:
-; DARWIN-32-PIC: call L92$pb
+; DARWIN-32-PIC: calll L92$pb
; DARWIN-32-PIC-NEXT: L92$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
@@ -6166,7 +6166,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat03:
-; DARWIN-32-PIC: call L93$pb
+; DARWIN-32-PIC: calll L93$pb
; DARWIN-32-PIC-NEXT: L93$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_dsrc-L93$pb)+64(%eax), %eax
@@ -6214,7 +6214,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat04:
-; DARWIN-32-PIC: call L94$pb
+; DARWIN-32-PIC: calll L94$pb
; DARWIN-32-PIC-NEXT: L94$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L94$pb)+64(%eax), %eax
@@ -6271,7 +6271,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat05:
-; DARWIN-32-PIC: call L95$pb
+; DARWIN-32-PIC: calll L95$pb
; DARWIN-32-PIC-NEXT: L95$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dptr-L95$pb(%eax), %eax
@@ -6322,7 +6322,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat06:
-; DARWIN-32-PIC: call L96$pb
+; DARWIN-32-PIC: calll L96$pb
; DARWIN-32-PIC-NEXT: L96$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_lsrc-L96$pb)+64(%eax), %eax
@@ -6369,7 +6369,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat07:
-; DARWIN-32-PIC: call L97$pb
+; DARWIN-32-PIC: calll L97$pb
; DARWIN-32-PIC-NEXT: L97$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L97$pb)+64(%eax), %eax
@@ -6425,7 +6425,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat08:
-; DARWIN-32-PIC: call L98$pb
+; DARWIN-32-PIC: calll L98$pb
; DARWIN-32-PIC-NEXT: L98$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lptr-L98$pb(%eax), %eax
@@ -6478,7 +6478,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam00:
-; DARWIN-32-PIC: call L99$pb
+; DARWIN-32-PIC: calll L99$pb
; DARWIN-32-PIC-NEXT: L99$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6531,7 +6531,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam01:
-; DARWIN-32-PIC: call L100$pb
+; DARWIN-32-PIC: calll L100$pb
; DARWIN-32-PIC-NEXT: L100$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6584,7 +6584,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxm01:
-; DARWIN-32-PIC: call L101$pb
+; DARWIN-32-PIC: calll L101$pb
; DARWIN-32-PIC-NEXT: L101$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6646,7 +6646,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam02:
-; DARWIN-32-PIC: call L102$pb
+; DARWIN-32-PIC: calll L102$pb
; DARWIN-32-PIC-NEXT: L102$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
@@ -6702,7 +6702,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam03:
-; DARWIN-32-PIC: call L103$pb
+; DARWIN-32-PIC: calll L103$pb
; DARWIN-32-PIC-NEXT: L103$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_dsrc-L103$pb)+262144(%eax), %eax
@@ -6750,7 +6750,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam04:
-; DARWIN-32-PIC: call L104$pb
+; DARWIN-32-PIC: calll L104$pb
; DARWIN-32-PIC-NEXT: L104$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L104$pb)+262144(%eax), %eax
@@ -6807,7 +6807,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam05:
-; DARWIN-32-PIC: call L105$pb
+; DARWIN-32-PIC: calll L105$pb
; DARWIN-32-PIC-NEXT: L105$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6858,7 +6858,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam06:
-; DARWIN-32-PIC: call L106$pb
+; DARWIN-32-PIC: calll L106$pb
; DARWIN-32-PIC-NEXT: L106$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_lsrc-L106$pb)+262144(%eax), %eax
@@ -6905,7 +6905,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam07:
-; DARWIN-32-PIC: call L107$pb
+; DARWIN-32-PIC: calll L107$pb
; DARWIN-32-PIC-NEXT: L107$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L107$pb)+262144(%eax), %eax
@@ -6961,7 +6961,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam08:
-; DARWIN-32-PIC: call L108$pb
+; DARWIN-32-PIC: calll L108$pb
; DARWIN-32-PIC-NEXT: L108$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -7021,7 +7021,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat00:
-; DARWIN-32-PIC: call L109$pb
+; DARWIN-32-PIC: calll L109$pb
; DARWIN-32-PIC-NEXT: L109$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7082,7 +7082,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxt00:
-; DARWIN-32-PIC: call L110$pb
+; DARWIN-32-PIC: calll L110$pb
; DARWIN-32-PIC-NEXT: L110$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7143,7 +7143,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat01:
-; DARWIN-32-PIC: call L111$pb
+; DARWIN-32-PIC: calll L111$pb
; DARWIN-32-PIC-NEXT: L111$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7204,7 +7204,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxt01:
-; DARWIN-32-PIC: call L112$pb
+; DARWIN-32-PIC: calll L112$pb
; DARWIN-32-PIC-NEXT: L112$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7272,7 +7272,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat02:
-; DARWIN-32-PIC: call L113$pb
+; DARWIN-32-PIC: calll L113$pb
; DARWIN-32-PIC-NEXT: L113$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
@@ -7336,7 +7336,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat03:
-; DARWIN-32-PIC: call L114$pb
+; DARWIN-32-PIC: calll L114$pb
; DARWIN-32-PIC-NEXT: L114$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7395,7 +7395,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat04:
-; DARWIN-32-PIC: call L115$pb
+; DARWIN-32-PIC: calll L115$pb
; DARWIN-32-PIC-NEXT: L115$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7461,7 +7461,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat05:
-; DARWIN-32-PIC: call L116$pb
+; DARWIN-32-PIC: calll L116$pb
; DARWIN-32-PIC-NEXT: L116$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7521,7 +7521,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat06:
-; DARWIN-32-PIC: call L117$pb
+; DARWIN-32-PIC: calll L117$pb
; DARWIN-32-PIC-NEXT: L117$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7580,7 +7580,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat07:
-; DARWIN-32-PIC: call L118$pb
+; DARWIN-32-PIC: calll L118$pb
; DARWIN-32-PIC-NEXT: L118$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7645,7 +7645,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat08:
-; DARWIN-32-PIC: call L119$pb
+; DARWIN-32-PIC: calll L119$pb
; DARWIN-32-PIC-NEXT: L119$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7706,7 +7706,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam00:
-; DARWIN-32-PIC: call L120$pb
+; DARWIN-32-PIC: calll L120$pb
; DARWIN-32-PIC-NEXT: L120$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7767,7 +7767,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxm00:
-; DARWIN-32-PIC: call L121$pb
+; DARWIN-32-PIC: calll L121$pb
; DARWIN-32-PIC-NEXT: L121$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7828,7 +7828,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam01:
-; DARWIN-32-PIC: call L122$pb
+; DARWIN-32-PIC: calll L122$pb
; DARWIN-32-PIC-NEXT: L122$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7889,7 +7889,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxm01:
-; DARWIN-32-PIC: call L123$pb
+; DARWIN-32-PIC: calll L123$pb
; DARWIN-32-PIC-NEXT: L123$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7957,7 +7957,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam02:
-; DARWIN-32-PIC: call L124$pb
+; DARWIN-32-PIC: calll L124$pb
; DARWIN-32-PIC-NEXT: L124$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
@@ -8021,7 +8021,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam03:
-; DARWIN-32-PIC: call L125$pb
+; DARWIN-32-PIC: calll L125$pb
; DARWIN-32-PIC-NEXT: L125$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8080,7 +8080,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam04:
-; DARWIN-32-PIC: call L126$pb
+; DARWIN-32-PIC: calll L126$pb
; DARWIN-32-PIC-NEXT: L126$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8146,7 +8146,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam05:
-; DARWIN-32-PIC: call L127$pb
+; DARWIN-32-PIC: calll L127$pb
; DARWIN-32-PIC-NEXT: L127$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8206,7 +8206,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam06:
-; DARWIN-32-PIC: call L128$pb
+; DARWIN-32-PIC: calll L128$pb
; DARWIN-32-PIC-NEXT: L128$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8265,7 +8265,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam07:
-; DARWIN-32-PIC: call L129$pb
+; DARWIN-32-PIC: calll L129$pb
; DARWIN-32-PIC-NEXT: L129$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8330,7 +8330,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam08:
-; DARWIN-32-PIC: call L130$pb
+; DARWIN-32-PIC: calll L130$pb
; DARWIN-32-PIC-NEXT: L130$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8375,31 +8375,32 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: lcallee:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: lcallee:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: lcallee:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq x@PLT
; LINUX-64-PIC-NEXT: callq x@PLT
; LINUX-64-PIC-NEXT: callq x@PLT
@@ -8407,47 +8408,47 @@ entry:
; LINUX-64-PIC-NEXT: callq x@PLT
; LINUX-64-PIC-NEXT: callq x@PLT
; LINUX-64-PIC-NEXT: callq x@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _lcallee:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _lcallee:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _lcallee:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _lcallee:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _x
; DARWIN-64-STATIC-NEXT: callq _x
; DARWIN-64-STATIC-NEXT: callq _x
@@ -8455,11 +8456,11 @@ entry:
; DARWIN-64-STATIC-NEXT: callq _x
; DARWIN-64-STATIC-NEXT: callq _x
; DARWIN-64-STATIC-NEXT: callq _x
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _lcallee:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _x
; DARWIN-64-DYNAMIC-NEXT: callq _x
; DARWIN-64-DYNAMIC-NEXT: callq _x
@@ -8467,11 +8468,11 @@ entry:
; DARWIN-64-DYNAMIC-NEXT: callq _x
; DARWIN-64-DYNAMIC-NEXT: callq _x
; DARWIN-64-DYNAMIC-NEXT: callq _x
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _lcallee:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _x
; DARWIN-64-PIC-NEXT: callq _x
; DARWIN-64-PIC-NEXT: callq _x
@@ -8479,7 +8480,7 @@ entry:
; DARWIN-64-PIC-NEXT: callq _x
; DARWIN-64-PIC-NEXT: callq _x
; DARWIN-64-PIC-NEXT: callq _x
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -8506,31 +8507,32 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: dcallee:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dcallee:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: dcallee:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq y@PLT
; LINUX-64-PIC-NEXT: callq y@PLT
; LINUX-64-PIC-NEXT: callq y@PLT
@@ -8538,47 +8540,47 @@ entry:
; LINUX-64-PIC-NEXT: callq y@PLT
; LINUX-64-PIC-NEXT: callq y@PLT
; LINUX-64-PIC-NEXT: callq y@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _dcallee:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dcallee:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dcallee:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _dcallee:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _y
; DARWIN-64-STATIC-NEXT: callq _y
; DARWIN-64-STATIC-NEXT: callq _y
@@ -8586,11 +8588,11 @@ entry:
; DARWIN-64-STATIC-NEXT: callq _y
; DARWIN-64-STATIC-NEXT: callq _y
; DARWIN-64-STATIC-NEXT: callq _y
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _dcallee:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _y
; DARWIN-64-DYNAMIC-NEXT: callq _y
; DARWIN-64-DYNAMIC-NEXT: callq _y
@@ -8598,11 +8600,11 @@ entry:
; DARWIN-64-DYNAMIC-NEXT: callq _y
; DARWIN-64-DYNAMIC-NEXT: callq _y
; DARWIN-64-DYNAMIC-NEXT: callq _y
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _dcallee:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _y
; DARWIN-64-PIC-NEXT: callq _y
; DARWIN-64-PIC-NEXT: callq _y
@@ -8610,7 +8612,7 @@ entry:
; DARWIN-64-PIC-NEXT: callq _y
; DARWIN-64-PIC-NEXT: callq _y
; DARWIN-64-PIC-NEXT: callq _y
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -8644,7 +8646,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _address:
-; DARWIN-32-PIC: call L133$pb
+; DARWIN-32-PIC: calll L133$pb
; DARWIN-32-PIC-NEXT: L133$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_callee$non_lazy_ptr-L133$pb(%eax), %eax
@@ -8693,7 +8695,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _laddress:
-; DARWIN-32-PIC: call L134$pb
+; DARWIN-32-PIC: calll L134$pb
; DARWIN-32-PIC-NEXT: L134$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lcallee-L134$pb(%eax), %eax
@@ -8740,7 +8742,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _daddress:
-; DARWIN-32-PIC: call L135$pb
+; DARWIN-32-PIC: calll L135$pb
; DARWIN-32-PIC-NEXT: L135$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dcallee-L135$pb(%eax), %eax
@@ -8770,66 +8772,67 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: caller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call callee
-; LINUX-32-STATIC-NEXT: call callee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll callee
+; LINUX-32-STATIC-NEXT: calll callee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: caller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call callee
-; LINUX-32-PIC-NEXT: call callee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll callee
+; LINUX-32-PIC-NEXT: calll callee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: caller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq callee@PLT
; LINUX-64-PIC-NEXT: callq callee@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _caller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _callee
-; DARWIN-32-STATIC-NEXT: call _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _caller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _caller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_callee$stub
-; DARWIN-32-PIC-NEXT: call L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _caller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _callee
; DARWIN-64-STATIC-NEXT: callq _callee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _caller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _callee
; DARWIN-64-DYNAMIC-NEXT: callq _callee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _caller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _callee
; DARWIN-64-PIC-NEXT: callq _callee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -8844,66 +8847,67 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: dcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call dcallee
-; LINUX-32-STATIC-NEXT: call dcallee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll dcallee
+; LINUX-32-STATIC-NEXT: calll dcallee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call dcallee
-; LINUX-32-PIC-NEXT: call dcallee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll dcallee
+; LINUX-32-PIC-NEXT: calll dcallee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: dcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq dcallee
; LINUX-64-PIC-NEXT: callq dcallee
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _dcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _dcallee
-; DARWIN-32-STATIC-NEXT: call _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _dcallee
-; DARWIN-32-PIC-NEXT: call _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _dcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _dcallee
; DARWIN-64-STATIC-NEXT: callq _dcallee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _dcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _dcallee
; DARWIN-64-DYNAMIC-NEXT: callq _dcallee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _dcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _dcallee
; DARWIN-64-PIC-NEXT: callq _dcallee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -8918,66 +8922,67 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: lcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call lcallee
-; LINUX-32-STATIC-NEXT: call lcallee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll lcallee
+; LINUX-32-STATIC-NEXT: calll lcallee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: lcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call lcallee
-; LINUX-32-PIC-NEXT: call lcallee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll lcallee
+; LINUX-32-PIC-NEXT: calll lcallee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: lcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq lcallee@PLT
; LINUX-64-PIC-NEXT: callq lcallee@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _lcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _lcallee
-; DARWIN-32-STATIC-NEXT: call _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _lcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _lcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _lcallee
-; DARWIN-32-PIC-NEXT: call _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _lcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _lcallee
; DARWIN-64-STATIC-NEXT: callq _lcallee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _lcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _lcallee
; DARWIN-64-DYNAMIC-NEXT: callq _lcallee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _lcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _lcallee
; DARWIN-64-PIC-NEXT: callq _lcallee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -8990,57 +8995,58 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: tailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call callee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll callee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: tailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call callee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll callee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: tailcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq callee@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _tailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _tailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _tailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _tailcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _callee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _tailcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _callee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _tailcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _callee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9053,57 +9059,58 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: dtailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call dcallee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll dcallee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dtailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call dcallee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll dcallee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: dtailcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq dcallee
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _dtailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dtailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dtailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _dtailcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _dcallee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _dtailcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _dcallee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _dtailcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _dcallee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9116,57 +9123,58 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: ltailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call lcallee
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll lcallee
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: ltailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call lcallee
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll lcallee
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: ltailcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq lcallee@PLT
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _ltailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _ltailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ltailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _ltailcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq _lcallee
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _ltailcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq _lcallee
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _ltailcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq _lcallee
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9183,17 +9191,18 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: icaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: icaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: icaller:
@@ -9206,8 +9215,8 @@ entry:
; DARWIN-32-STATIC: _icaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_ifunc
-; DARWIN-32-STATIC-NEXT: call *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
@@ -9215,8 +9224,8 @@ entry:
; DARWIN-32-DYNAMIC: pushl %esi
; DARWIN-32-DYNAMIC-NEXT: subl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: movl L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
; DARWIN-32-DYNAMIC-NEXT: addl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: popl %esi
; DARWIN-32-DYNAMIC-NEXT: ret
@@ -9224,12 +9233,12 @@ entry:
; DARWIN-32-PIC: _icaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L142$pb
+; DARWIN-32-PIC-NEXT: calll L142$pb
; DARWIN-32-PIC-NEXT: L142$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: call *(%esi)
-; DARWIN-32-PIC-NEXT: call *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9272,17 +9281,18 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: dicaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *difunc
-; LINUX-32-STATIC-NEXT: call *difunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *difunc
+; LINUX-32-STATIC-NEXT: calll *difunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dicaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *difunc
-; LINUX-32-PIC-NEXT: call *difunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *difunc
+; LINUX-32-PIC-NEXT: calll *difunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: dicaller:
@@ -9295,49 +9305,49 @@ entry:
; DARWIN-32-STATIC: _dicaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_difunc
-; DARWIN-32-STATIC-NEXT: call *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dicaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dicaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L143$pb
+; DARWIN-32-PIC-NEXT: calll L143$pb
; DARWIN-32-PIC-NEXT: L143$pb:
; DARWIN-32-PIC-NEXT: popl %esi
-; DARWIN-32-PIC-NEXT: call *_difunc-L143$pb(%esi)
-; DARWIN-32-PIC-NEXT: call *_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L143$pb(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _dicaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip)
; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _dicaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip)
; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _dicaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq *_difunc(%rip)
; DARWIN-64-PIC-NEXT: callq *_difunc(%rip)
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9354,71 +9364,72 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: licaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *lifunc
-; LINUX-32-STATIC-NEXT: call *lifunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *lifunc
+; LINUX-32-STATIC-NEXT: calll *lifunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: licaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *lifunc
-; LINUX-32-PIC-NEXT: call *lifunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *lifunc
+; LINUX-32-PIC-NEXT: calll *lifunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: licaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq *lifunc(%rip)
; LINUX-64-PIC-NEXT: callq *lifunc(%rip)
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _licaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_lifunc
-; DARWIN-32-STATIC-NEXT: call *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _licaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _licaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L144$pb
+; DARWIN-32-PIC-NEXT: calll L144$pb
; DARWIN-32-PIC-NEXT: L144$pb:
; DARWIN-32-PIC-NEXT: popl %esi
-; DARWIN-32-PIC-NEXT: call *_lifunc-L144$pb(%esi)
-; DARWIN-32-PIC-NEXT: call *_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L144$pb(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _licaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip)
; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _licaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip)
; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _licaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip)
; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9435,17 +9446,18 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: itailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: itailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: itailcaller:
@@ -9458,8 +9470,8 @@ entry:
; DARWIN-32-STATIC: _itailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_ifunc
-; DARWIN-32-STATIC-NEXT: call *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
@@ -9467,8 +9479,8 @@ entry:
; DARWIN-32-DYNAMIC: pushl %esi
; DARWIN-32-DYNAMIC-NEXT: subl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: movl L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
; DARWIN-32-DYNAMIC-NEXT: addl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: popl %esi
; DARWIN-32-DYNAMIC-NEXT: ret
@@ -9476,12 +9488,12 @@ entry:
; DARWIN-32-PIC: _itailcaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L145$pb
+; DARWIN-32-PIC-NEXT: calll L145$pb
; DARWIN-32-PIC-NEXT: L145$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: call *(%esi)
-; DARWIN-32-PIC-NEXT: call *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9521,60 +9533,61 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: ditailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *difunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *difunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: ditailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *difunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *difunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: ditailcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: movq difunc@GOTPCREL(%rip), %rax
; LINUX-64-PIC-NEXT: callq *(%rax)
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _ditailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _ditailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ditailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L146$pb
+; DARWIN-32-PIC-NEXT: calll L146$pb
; DARWIN-32-PIC-NEXT: L146$pb:
; DARWIN-32-PIC-NEXT: popl %eax
-; DARWIN-32-PIC-NEXT: call *_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L146$pb(%eax)
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _ditailcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _ditailcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _ditailcaller:
; DARWIN-64-PIC: callq *_difunc(%rip)
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
@@ -9588,59 +9601,60 @@ entry:
; LINUX-64-STATIC: ret
; LINUX-32-STATIC: litailcaller:
-; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *lifunc
-; LINUX-32-STATIC-NEXT: addl $4, %esp
+; LINUX-32-STATIC: subl
+; LINUX-32-STATIC-NEXT: calll *lifunc
+; LINUX-32-STATIC-NEXT: addl
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: litailcaller:
-; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *lifunc
-; LINUX-32-PIC-NEXT: addl $4, %esp
+; LINUX-32-PIC: subl
+; LINUX-32-PIC-NEXT: calll *lifunc
+; LINUX-32-PIC-NEXT: addl
+
; LINUX-32-PIC-NEXT: ret
; LINUX-64-PIC: litailcaller:
-; LINUX-64-PIC: subq $8, %rsp
+; LINUX-64-PIC: pushq
; LINUX-64-PIC-NEXT: callq *lifunc(%rip)
-; LINUX-64-PIC-NEXT: addq $8, %rsp
+; LINUX-64-PIC-NEXT: popq
; LINUX-64-PIC-NEXT: ret
; DARWIN-32-STATIC: _litailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _litailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _litailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L147$pb
+; DARWIN-32-PIC-NEXT: calll L147$pb
; DARWIN-32-PIC-NEXT: L147$pb:
; DARWIN-32-PIC-NEXT: popl %eax
-; DARWIN-32-PIC-NEXT: call *_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L147$pb(%eax)
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
; DARWIN-64-STATIC: _litailcaller:
-; DARWIN-64-STATIC: subq $8, %rsp
+; DARWIN-64-STATIC: pushq
; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: addq $8, %rsp
+; DARWIN-64-STATIC-NEXT: popq
; DARWIN-64-STATIC-NEXT: ret
; DARWIN-64-DYNAMIC: _litailcaller:
-; DARWIN-64-DYNAMIC: subq $8, %rsp
+; DARWIN-64-DYNAMIC: pushq
; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: popq
; DARWIN-64-DYNAMIC-NEXT: ret
; DARWIN-64-PIC: _litailcaller:
-; DARWIN-64-PIC: subq $8, %rsp
+; DARWIN-64-PIC: pushq
; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: addq $8, %rsp
+; DARWIN-64-PIC-NEXT: popq
; DARWIN-64-PIC-NEXT: ret
}
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 0000000..f924ec8
--- /dev/null
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK: sbbl %ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl %ecx, %eax
+ %add4 = add i32 %x, %sum
+ %cmp = icmp ult i32 %add4, %x
+ %inc = zext i1 %cmp to i32
+ %z.0 = add i32 %add4, %inc
+ ret i32 %z.0
+}
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: subl
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+ %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+ %0 = extractvalue { i32, i1 } %uadd, 0
+ %cmp = extractvalue { i32, i1 } %uadd, 1
+ %inc = zext i1 %cmp to i32
+ %z.0 = add i32 %0, %inc
+ ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 3991a68..3ec5358 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -92,3 +92,43 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
; X64: ret
}
+define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
+ %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+ ret {i32, i1} %t
+}
+
+; X64: test7:
+; X64: addl %esi, %eax
+; X64-NEXT: setb %dl
+; X64-NEXT: ret
+
+; PR5443
+define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
+entry:
+ %extleft = zext i64 %left to i65
+ %extright = zext i64 %right to i65
+ %sum = add i65 %extleft, %extright
+ %res.0 = trunc i65 %sum to i64
+ %overflow = and i65 %sum, -18446744073709551616
+ %res.1 = icmp ne i65 %overflow, 0
+ %final0 = insertvalue {i64, i1} undef, i64 %res.0, 0
+ %final1 = insertvalue {i64, i1} %final0, i1 %res.1, 1
+ ret {i64, i1} %final1
+}
+
+; X64: test8:
+; X64: addq
+; X64-NEXT: sbbq
+; X64-NEXT: testb
+
+define i32 @test9(i32 %x, i32 %y) nounwind readnone {
+ %cmp = icmp eq i32 %x, 10
+ %sub = sext i1 %cmp to i32
+ %cond = add i32 %sub, %y
+ ret i32 %cond
+; X64: test9:
+; X64: cmpl $10
+; X64: sete
+; X64: subl
+; X64: ret
+}
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index be0908a..49abd8a 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin10.0"
; This array should go into the __TEXT,__const section, not into the
; __DATA,__const section, because the elements don't need relocations.
-@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+@test.array = internal unnamed_addr constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
define void @test(i32 %i) nounwind ssp {
entry:
diff --git a/test/CodeGen/X86/alldiv-divdi3.ll b/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 0000000..86aa1fd
--- /dev/null
+++ b/test/CodeGen/X86/alldiv-divdi3.ll
@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+ %conv4 = sext i32 %argc to i64
+ %div = sdiv i64 84, %conv4
+ %conv7 = trunc i64 %div to i32
+ ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 0000000..640237d
--- /dev/null
+++ b/test/CodeGen/X86/andimm8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+
+; PR8365
+; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64 %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK: orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+ %t1 = and i64 %zed, -4
+ %t2 = or i64 %t1, 2
+ store i64 %t2, i64* %x, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
new file mode 100644
index 0000000..d0c64f2
--- /dev/null
+++ b/test/CodeGen/X86/apm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR8573
+
+; CHECK: foo:
+; CHECK: leaq (%rdi), %rax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: monitor
+define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+ ret void
+}
+
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: bar:
+; CHECK: movl %edi, %ecx
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: mwait
+define void @bar(i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+ ret void
+}
+
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 3ef1887..f3ade93 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,16 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t1
-; RUN: grep "lock" %t1 | count 17
-; RUN: grep "xaddl" %t1 | count 4
-; RUN: grep "cmpxchgl" %t1 | count 13
-; RUN: grep "xchgl" %t1 | count 14
-; RUN: grep "cmova" %t1 | count 2
-; RUN: grep "cmovb" %t1 | count 2
-; RUN: grep "cmovg" %t1 | count 2
-; RUN: grep "cmovl" %t1 | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-define void @main(i32 %argc, i8** %argv) {
+define void @func(i32 %argc, i8** %argv) nounwind {
entry:
%argc.addr = alloca i32 ; <i32*> [#uses=1]
%argv.addr = alloca i8** ; <i8***> [#uses=1]
@@ -29,48 +21,105 @@ entry:
store i32 3855, i32* %ort
store i32 3855, i32* %xort
store i32 4, i32* %temp
- %tmp = load i32* %temp ; <i32> [#uses=1]
+ %tmp = load i32* %temp
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp ) ; <i32>:0 [#uses=1]
store i32 %0, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 ) ; <i32>:1 [#uses=1]
store i32 %1, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:2 [#uses=1]
store i32 %2, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:3 [#uses=1]
store i32 %3, i32* %old
+ ; CHECK: andl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 ) ; <i32>:4 [#uses=1]
store i32 %4, i32* %old
+ ; CHECK: orl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 ) ; <i32>:5 [#uses=1]
store i32 %5, i32* %old
+ ; CHECK: xorl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 ) ; <i32>:6 [#uses=1]
store i32 %6, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 ) ; <i32>:7 [#uses=1]
store i32 %7, i32* %old
%neg = sub i32 0, 1 ; <i32> [#uses=1]
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg ) ; <i32>:8 [#uses=1]
store i32 %8, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:9 [#uses=1]
store i32 %9, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 ) ; <i32>:10 [#uses=1]
store i32 %10, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 ) ; <i32>:11 [#uses=1]
store i32 %11, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 ) ; <i32>:12 [#uses=1]
store i32 %12, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:13 [#uses=1]
store i32 %13, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 ) ; <i32>:14 [#uses=1]
store i32 %14, i32* %old
+ ; CHECK: xchgl %{{.*}}, {{.*}}(%esp)
call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 ) ; <i32>:15 [#uses=1]
store i32 %15, i32* %old
%neg1 = sub i32 0, 10 ; <i32> [#uses=1]
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 ) ; <i32>:16 [#uses=1]
store i32 %16, i32* %old
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 ) ; <i32>:17 [#uses=1]
store i32 %17, i32* %old
ret void
}
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CHECK: cmpxchgl %{{.*}}, %gs:(%{{.*}})
+
+ %0 = tail call i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* %P, i32 0, i32 1)
+ ret void
+}
+
+declare i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* nocapture, i32, i32) nounwind
+
declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
index a72160b..2bd3b5d 100644
--- a/test/CodeGen/X86/avx-128.ll
+++ b/test/CodeGen/X86/avx-128.ll
@@ -4,7 +4,7 @@
define void @zero() nounwind ssp {
entry:
- ; CHECK: vpxor
+ ; CHECK: vxorps
; CHECK: vmovaps
store <4 x float> zeroinitializer, <4 x float>* @z, align 16
ret void
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 9de9023..6c32396 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vcomisd
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl %eax, %eax
+ ; CHECK: andl $1, %eax
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -230,7 +230,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
- ; CHECK: vcvttss2si
+ ; CHECK: vcvttsd2si
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vucomisd
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vptest
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vcomiss
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbb
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vucomiss
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK: vptest
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vtestpd
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK: vtestpd
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vtestps
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vtestps
- ; CHECK: setb
- ; CHECK: movzbl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
index b186710..5a466fc 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -17,7 +17,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readn
define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
- ; CHECK: vcvttss2si
+ ; CHECK: vcvttsd2si
%res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
ret i64 %res
}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
new file mode 100644
index 0000000..ac972a8
--- /dev/null
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+
+
+define float @extractFloat1() nounwind {
+entry:
+ ; CHECK: 1065353216
+ %tmp0 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+ %tmp1 = extractelement <2 x float> %tmp0, i32 0
+ ret float %tmp1
+}
+
+define float @extractFloat2() nounwind {
+entry:
+ ; CHECK: pxor %xmm0, %xmm0
+ %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+ %tmp5 = extractelement <2 x float> %tmp4, i32 1
+ ret float %tmp5
+}
+
+define i32 @extractInt2() nounwind {
+entry:
+ ; CHECK: xorl %eax, %eax
+ %tmp4 = bitcast <1 x i64> <i64 256> to <2 x i32>
+ %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+ ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/X86/bit-test-shift.ll b/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 0000000..7497613
--- /dev/null
+++ b/test/CodeGen/X86/bit-test-shift.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll $23, %eax
+; CHECK: sarl $31, %eax
+; CHECK: andl $-26, %eax
+ %and = and i32 %t, 256
+ %tobool = icmp eq i32 %and, 0
+ %retval.0 = select i1 %tobool, i32 0, i32 -26
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 2b70193..3bb9124 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep APP %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
+; RUN: not grep InlineAsm %t
; RUN: FileCheck %s < %t
; CHECK: foo:
@@ -65,6 +65,13 @@ define i32 @t32(i32 %x) nounwind {
ret i32 %asmtmp
}
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+ %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+ ret i32 %asmtmp
+}
+
; CHECK: s64:
; CHECK: bswapq
define i64 @s64(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index af36e1b..ac0bc09 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep {movq 8(%rsp), %rax}
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movl 8(%esp), %edx} %t
-; RUN: grep {movl 4(%esp), %eax} %t
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl 4(%esp), %eax
+; X86: movl 8(%esp), %edx
+
+; X86-64: movq 8(%rsp), %rax
%struct.s = type { i64, i64, i64 }
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
deleted file mode 100644
index 898c09b..0000000
--- a/test/CodeGen/X86/cmp-test.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep test | count 1
-
-define i32 @f1(i32 %X, i32* %y) {
- %tmp = load i32* %y ; <i32> [#uses=1]
- %tmp.upgrd.1 = icmp eq i32 %tmp, 0 ; <i1> [#uses=1]
- br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
-
-cond_true: ; preds = %0
- ret i32 1
-
-ReturnBlock: ; preds = %0
- ret i32 0
-}
-
-define i32 @f2(i32 %X, i32* %y) {
- %tmp = load i32* %y ; <i32> [#uses=1]
- %tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
- %tmp1.upgrd.2 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
- br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
-
-cond_true: ; preds = %0
- ret i32 1
-
-ReturnBlock: ; preds = %0
- ret i32 0
-}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
new file mode 100644
index 0000000..ef5e353
--- /dev/null
+++ b/test/CodeGen/X86/cmp.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
+
+define i32 @test1(i32 %X, i32* %y) nounwind {
+ %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp.upgrd.1 = icmp eq i32 %tmp, 0 ; <i1> [#uses=1]
+ br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true: ; preds = %0
+ ret i32 1
+
+ReturnBlock: ; preds = %0
+ ret i32 0
+; CHECK: test1:
+; CHECK: cmpl $0, (%rsi)
+}
+
+define i32 @test2(i32 %X, i32* %y) nounwind {
+ %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
+ %tmp1.upgrd.2 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
+ br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true: ; preds = %0
+ ret i32 1
+
+ReturnBlock: ; preds = %0
+ ret i32 0
+; CHECK: test2:
+; CHECK: movl (%rsi), %eax
+; CHECK: shll $3, %eax
+; CHECK: testl %eax, %eax
+}
+
+define i64 @test3(i64 %x) nounwind {
+ %t = icmp eq i64 %x, 0
+ %r = zext i1 %t to i64
+ ret i64 %r
+; CHECK: test3:
+; CHECK: testq %rdi, %rdi
+; CHECK: sete %al
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+}
+
+define i64 @test4(i64 %x) nounwind {
+ %t = icmp slt i64 %x, 1
+ %r = zext i1 %t to i64
+ ret i64 %r
+; CHECK: test4:
+; CHECK: testq %rdi, %rdi
+; CHECK: setle %al
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+}
+
+
+define i32 @test5(double %A) nounwind {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+; CHECK: test5:
+; CHECK: ucomisd LCPI4_0(%rip), %xmm0
+; CHECK: ucomisd LCPI4_1(%rip), %xmm0
+}
+
+declare i32 @foo(...)
+
+define i32 @test6() nounwind align 2 {
+ %A = alloca {i64, i64}, align 8
+ %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
+ %C = load i64* %B
+ %D = icmp eq i64 %C, 0
+ br i1 %D, label %T, label %F
+T:
+ ret i32 1
+
+F:
+ ret i32 0
+; CHECK: test6:
+; CHECK: cmpq $0, -8(%rsp)
+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
deleted file mode 100644
index 4878448..0000000
--- a/test/CodeGen/X86/cmp0.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-define i64 @test0(i64 %x) nounwind {
- %t = icmp eq i64 %x, 0
- %r = zext i1 %t to i64
- ret i64 %r
-; CHECK: test0:
-; CHECK: testq %rdi, %rdi
-; CHECK: sete %al
-; CHECK: movzbl %al, %eax
-; CHECK: ret
-}
-
-define i64 @test1(i64 %x) nounwind {
- %t = icmp slt i64 %x, 1
- %r = zext i1 %t to i64
- ret i64 %r
-; CHECK: test1:
-; CHECK: testq %rdi, %rdi
-; CHECK: setle %al
-; CHECK: movzbl %al, %eax
-; CHECK: ret
-}
-
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
deleted file mode 100644
index 9a8e00c..0000000
--- a/test/CodeGen/X86/cmp2.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
-
-define i32 @test(double %A) nounwind {
- entry:
- %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
- %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
- %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
- br i1 %bothcond, label %bb8, label %bb12
-
- bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
- ret i32 %tmp9
-
- bb12:; preds = %entry
- ret i32 32
-}
-
-declare i32 @foo(...)
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 56ea26b..89b436e 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,24 +2,62 @@
; insertion of register-register copies.
; Make sure there are only 3 mov's for each testcase
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {\\\<mov\\\>} | count 6
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
-target triple = "i686-pc-linux-gnu"
@G = external global i32 ; <i32*> [#uses=2]
declare void @ext(i32)
-define i32 @add_test(i32 %X, i32 %Y) {
+define i32 @t1(i32 %X, i32 %Y) nounwind {
+; LINUX: t1:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: addl %eax, %ecx
+; LINUX: movl %ecx, G
%Z = add i32 %X, %Y ; <i32> [#uses=1]
store i32 %Z, i32* @G
ret i32 %X
}
-define i32 @xor_test(i32 %X, i32 %Y) {
+define i32 @t2(i32 %X, i32 %Y) nounwind {
+; LINUX: t2:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: xorl %eax, %ecx
+; LINUX: movl %ecx, G
%Z = xor i32 %X, %Y ; <i32> [#uses=1]
store i32 %Z, i32* @G
ret i32 %X
}
+; rdar://8762995
+%0 = type { i64, i32 }
+
+define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
+entry:
+; DARWIN: t3:
+; DARWIN: shlq $32, %rcx
+; DARWIN-NOT: leaq
+; DARWIN: orq %rcx, %rax
+; DARWIN-NOT: mov
+; DARWIN: shll $16
+ %tmp21 = zext i32 %lb to i64
+ %tmp23 = zext i32 %ub to i64
+ %tmp24 = shl i64 %tmp23, 32
+ %ins26 = or i64 %tmp24, %tmp21
+ %tmp28 = zext i8 %has_lb to i32
+ %tmp33 = zext i8 %has_ub to i32
+ %tmp34 = shl i32 %tmp33, 8
+ %tmp38 = zext i8 %lb_inclusive to i32
+ %tmp39 = shl i32 %tmp38, 16
+ %tmp43 = zext i8 %ub_inclusive to i32
+ %tmp44 = shl i32 %tmp43, 24
+ %ins31 = or i32 %tmp39, %tmp28
+ %ins36 = or i32 %ins31, %tmp34
+ %ins46 = or i32 %ins36, %tmp44
+ %tmp16 = insertvalue %0 undef, i64 %ins26, 0
+ %tmp19 = insertvalue %0 %tmp16, i32 %ins46, 1
+ ret %0 %tmp19
+}
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
index 2be90c9..9aa44a3 100644
--- a/test/CodeGen/X86/compare-inf.ll
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -5,7 +5,7 @@
; CHECK: oeq_inff:
; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
define float @oeq_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp oeq float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -14,7 +14,7 @@ define float @oeq_inff(float %x, float %y) nounwind readonly {
; CHECK: oeq_inf:
; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
define double @oeq_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp oeq double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -23,7 +23,7 @@ define double @oeq_inf(double %x, double %y) nounwind readonly {
; CHECK: une_inff:
; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
define float @une_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp une float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -32,7 +32,7 @@ define float @une_inff(float %x, float %y) nounwind readonly {
; CHECK: une_inf:
; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
define double @une_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp une double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -41,7 +41,7 @@ define double @une_inf(double %x, double %y) nounwind readonly {
; CHECK: oeq_neg_inff:
; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp oeq float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -50,7 +50,7 @@ define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
; CHECK: oeq_neg_inf:
; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp oeq double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -59,7 +59,7 @@ define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
; CHECK: une_neg_inff:
; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
define float @une_neg_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp une float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -68,7 +68,7 @@ define float @une_neg_inff(float %x, float %y) nounwind readonly {
; CHECK: une_neg_inf:
; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
define double @une_neg_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp une double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
diff --git a/test/CodeGen/X86/complex-asm.ll b/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 0000000..49878b9
--- /dev/null
+++ b/test/CodeGen/X86/complex-asm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+ %v = alloca %0, align 8
+ call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+ %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+ %1 = load i64* %0, align 8
+ %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+ %3 = load i64* %2, align 8
+ %mrv4 = insertvalue %0 undef, i64 %1, 0
+ %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+ ret %0 %mrv5
+}
diff --git a/test/CodeGen/X86/conditional-indecrement.ll b/test/CodeGen/X86/conditional-indecrement.ll
new file mode 100644
index 0000000..a3a0c39
--- /dev/null
+++ b/test/CodeGen/X86/conditional-indecrement.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+ %not.cmp = icmp ne i32 %a, 0
+ %inc = zext i1 %not.cmp to i32
+ %retval.0 = add i32 %inc, %b
+ ret i32 %retval.0
+; CHECK: test1:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+ %cmp = icmp eq i32 %a, 0
+ %inc = zext i1 %cmp to i32
+ %retval.0 = add i32 %inc, %b
+ ret i32 %retval.0
+; CHECK: test2:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+ %cmp = icmp eq i32 %a, 0
+ %inc = zext i1 %cmp to i32
+ %retval.0 = add i32 %inc, %b
+ ret i32 %retval.0
+; CHECK: test3:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+ %not.cmp = icmp ne i32 %a, 0
+ %inc = zext i1 %not.cmp to i32
+ %retval.0 = add i32 %inc, %b
+ ret i32 %retval.0
+; CHECK: test4:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind readnone {
+ %not.cmp = icmp ne i32 %a, 0
+ %inc = zext i1 %not.cmp to i32
+ %retval.0 = sub i32 %b, %inc
+ ret i32 %retval.0
+; CHECK: test5:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
+
+define i32 @test6(i32 %a, i32 %b) nounwind readnone {
+ %cmp = icmp eq i32 %a, 0
+ %inc = zext i1 %cmp to i32
+ %retval.0 = sub i32 %b, %inc
+ ret i32 %retval.0
+; CHECK: test6:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind readnone {
+ %cmp = icmp eq i32 %a, 0
+ %inc = zext i1 %cmp to i32
+ %retval.0 = sub i32 %b, %inc
+ ret i32 %retval.0
+; CHECK: test7:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind readnone {
+ %not.cmp = icmp ne i32 %a, 0
+ %inc = zext i1 %not.cmp to i32
+ %retval.0 = sub i32 %b, %inc
+ ret i32 %retval.0
+; CHECK: test8:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
deleted file mode 100644
index 665984c..0000000
--- a/test/CodeGen/X86/const-select.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-
-; RUN: llc < %s | grep {LCPI0_0(,%eax,4)}
-define float @f(i32 %x) nounwind readnone {
-entry:
- %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
- %iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01 ; <float> [#uses=1]
- ret float %iftmp.0.0
-}
-
-; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
-define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
-entry:
- %0 = fcmp olt double %F, 4.200000e+01 ; <i1> [#uses=1]
- %iftmp.0.0 = select i1 %0, i32 4, i32 0 ; <i32> [#uses=1]
- %1 = getelementptr i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
- %2 = load i8* %1, align 1 ; <i8> [#uses=1]
- ret i8 %2
-}
-
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index a14a48b..2d8e63e 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -141,3 +141,61 @@ entry:
call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind
unreachable
}
+
+; Crash trying to form conditional increment with fp value.
+; PR8981
+define i32 @test9(double %X) ssp align 2 {
+entry:
+ %0 = fcmp one double %X, 0.000000e+00
+ %cond = select i1 %0, i32 1, i32 2
+ ret i32 %cond
+}
+
+
+; PR8514 - Crash in match address do to "heroics" turning and-of-shift into
+; shift of and.
+%struct.S0 = type { i8, [2 x i8], i8 }
+
+define void @func_59(i32 %p_63) noreturn nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc44, %entry
+ %p_63.addr.1 = phi i32 [ %p_63, %entry ], [ 0, %for.inc44 ]
+ %l_74.0 = phi i32 [ 0, %entry ], [ %add46, %for.inc44 ]
+ br i1 undef, label %for.inc44, label %bb.nph81
+
+bb.nph81: ; preds = %for.body
+ %tmp98 = add i32 %p_63.addr.1, 0
+ br label %for.body22
+
+for.body22: ; preds = %for.body22, %bb.nph81
+ %l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
+ %tmp110 = trunc i64 %l_75.077 to i32
+ %tmp111 = and i32 %tmp110, 65535
+ %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+ store i8 1, i8* %arrayidx32.0, align 4
+ %tmp106 = shl i32 %tmp110, 2
+ %tmp107 = and i32 %tmp106, 262140
+ %scevgep99.sum114 = or i32 %tmp107, 1
+ %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+ store i8 0, i8* %arrayidx32.1.1, align 1
+ %ins = or i64 undef, undef
+ br label %for.body22
+
+for.inc44: ; preds = %for.body
+ %add46 = add i32 %l_74.0, 1
+ br label %for.body
+}
+
+; PR9028
+define void @f(i64 %A) nounwind {
+entry:
+ %0 = zext i64 %A to i160
+ %1 = shl i160 %0, 64
+ %2 = zext i160 %1 to i576
+ %3 = zext i96 undef to i576
+ %4 = or i576 %3, %2
+ store i576 %4, i576* undef, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/critical-edge-split-2.ll b/test/CodeGen/X86/critical-edge-split-2.ll
new file mode 100644
index 0000000..70301cd
--- /dev/null
+++ b/test/CodeGen/X86/critical-edge-split-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type <{ %1, %1 }>
+%1 = type { i8, i8, i8, i8 }
+
+@g_2 = global %0 zeroinitializer
+@g_4 = global %1 zeroinitializer, align 4
+
+
+; PR8642
+define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp {
+entry:
+ br i1 %C, label %cond.end.i, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ br label %cond.end.i
+
+cond.end.i: ; preds = %entry
+ %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+ ret i16 %call1
+}
+
+; CHECK: test1:
+; CHECK: testb %dil, %dil
+; CHECK: jne LBB0_2
+; CHECK: divl
+; CHECK: LBB0_2:
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
deleted file mode 100644
index 96fef0f..0000000
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
-
- %CC = type { %Register }
- %II = type { %"struct.XX::II::$_74" }
- %JITFunction = type %YYValue* (%CC*, %YYValue**)
- %YYValue = type { i32 (...)** }
- %Register = type { %"struct.XX::ByteCodeFeatures" }
- %"struct.XX::ByteCodeFeatures" = type { i32 }
- %"struct.XX::II::$_74" = type { i8* }
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define %YYValue* @loop(%CC*, %YYValue**) nounwind {
-; <label>:2
- %3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1]
- %4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2]
- %5 = load %YYValue** %4 ; <%YYValue*> [#uses=3]
- %unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1]
- br i1 %unique_1.i, label %loop, label %11
-
-loop: ; preds = %6, %2
- %.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3]
- %immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1]
- br i1 %immediateCmp, label %6, label %8
-
-; <label>:6 ; preds = %loop
- %lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1]
- %7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2]
- %intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1]
- %intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1]
- %intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1]
- br i1 %intAddOverflow, label %.loopexit, label %loop
-
-; <label>:8 ; preds = %loop
- ret %YYValue* inttoptr (i32 10 to %YYValue*)
-
-.loopexit: ; preds = %6
- %9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1]
- store %YYValue* %.1, %YYValue** %9
- store %YYValue* %5, %YYValue** %4
- %10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
- ret %YYValue* %10
-
-; <label>:11 ; preds = %2
- %12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
- ret %YYValue* %12
-}
-
-declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
new file mode 100644
index 0000000..c957d38
--- /dev/null
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+
+define i32 @test1(i64 %x) nounwind readnone {
+ %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+ %cast = trunc i64 %count to i32
+ %cmp = icmp ugt i32 %cast, 1
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: test1:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: setne
+; CHECK: ret
+}
+
+
+define i32 @test2(i64 %x) nounwind readnone {
+ %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+ %cmp = icmp ult i64 %count, 2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: test2:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: sete
+; CHECK: ret
+}
+
+define i32 @test3(i64 %x) nounwind readnone {
+ %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+ %cast = trunc i64 %count to i6 ; Too small for 0-64
+ %cmp = icmp ult i6 %cast, 2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: test3:
+; CHECK: cmpb $2
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index 5cc6eaa..dae91d5 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
; Shows a dag combine bug that will generate an illegal build vector
; with v2i64 build_vector i32, i32.
diff --git a/test/CodeGen/X86/dbg-live-in-location.ll b/test/CodeGen/X86/dbg-live-in-location.ll
new file mode 100644
index 0000000..9b1464d
--- /dev/null
+++ b/test/CodeGen/X86/dbg-live-in-location.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+@str = internal constant [3 x i8] c"Hi\00"
+
+define void @foo() nounwind ssp {
+entry:
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0))
+ ret void, !dbg !17
+}
+
+; CHECK: arg.c:5:14
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+ tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !9), !dbg !19
+ tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !10), !dbg !20
+ %cmp = icmp sgt i32 %argc, 1, !dbg !21
+ br i1 %cmp, label %cond.end, label %for.body.lr.ph, !dbg !21
+
+cond.end: ; preds = %entry
+ %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !21
+ %tmp2 = load i8** %arrayidx, align 8, !dbg !21, !tbaa !22
+ %call = tail call i32 (...)* @atoi(i8* %tmp2) nounwind, !dbg !21
+ tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !16), !dbg !21
+ tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !14), !dbg !26
+ %cmp57 = icmp sgt i32 %call, 0, !dbg !26
+ br i1 %cmp57, label %for.body.lr.ph, label %for.end, !dbg !26
+
+for.body.lr.ph: ; preds = %entry, %cond.end
+ %cond10 = phi i32 [ %call, %cond.end ], [ 300, %entry ]
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0)) nounwind
+ %inc = add nsw i32 %i.08, 1, !dbg !27
+ %exitcond = icmp eq i32 %inc, %cond10
+ br i1 %exitcond, label %for.end, label %for.body, !dbg !26
+
+for.end: ; preds = %for.body, %cond.end
+ ret i32 0, !dbg !29
+}
+
+declare i32 @atoi(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !5}
+!llvm.dbg.lv.main = !{!9, !10, !14, !16}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"arg.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"arg.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124504)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 590081, metadata !5, metadata !"argc", metadata !1, i32 5, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 590081, metadata !5, metadata !"argv", metadata !1, i32 5, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"i", metadata !1, i32 7, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !5, i32 6, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 590080, metadata !15, metadata !"iterations", metadata !1, i32 8, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 4, i32 1, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 5, i32 14, metadata !5, null}
+!20 = metadata !{i32 5, i32 26, metadata !5, null}
+!21 = metadata !{i32 8, i32 51, metadata !15, null}
+!22 = metadata !{metadata !"any pointer", metadata !23}
+!23 = metadata !{metadata !"omnipotent char", metadata !24}
+!24 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!25 = metadata !{i32 0}
+!26 = metadata !{i32 9, i32 2, metadata !15, null}
+!27 = metadata !{i32 9, i32 30, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !15, i32 9, i32 2, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 12, i32 9, metadata !15, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 0000000..83df147
--- /dev/null
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT: .quad Lfunc_begin0
+;CHECK-NEXT: .quad Lfunc_end0
+;CHECK-NEXT: .short 1 ## Loc expr size
+;CHECK-NEXT: .byte 85 ## DW_OP_reg5
+;CHECK-NEXT: .quad 0
+;CHECK-NEXT: .quad 0
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+ tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+ tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+ br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2: ; preds = %entry
+ br label %bb4, !dbg !23
+
+bb4: ; preds = %bb2, %entry
+ br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i: ; preds = %bb4
+ unreachable
+
+__udivmodti4.exit: ; preds = %bb4
+ ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 0000000..89bbf34
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+;CHECK-NEXT: DW_TAG_formal_parameter
+;CHECK-NEXT: .ascii "sp" ## DW_AT_name
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+ tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+ tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+ %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+ store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+ %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+ %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+ store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+ %cmp = icmp ne float* %call, null, !dbg !29
+ %cond = zext i1 %cmp to i32, !dbg !29
+ ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+ tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+ tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+ store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+ %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+ store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+ ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !18}
+!llvm.dbg.gv = !{!19}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
new file mode 100644
index 0000000..2449046
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK: .ascii "var" ## DW_AT_name
+;CHECK-NEXT: .byte 0
+;CHECK-NEXT: .byte 2 ## DW_AT_decl_file
+;CHECK-NEXT: .short 19509 ## DW_AT_decl_line
+;CHECK-NEXT: .long 68 ## DW_AT_type
+;CHECK-NEXT: .byte 1 ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+ call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+ %tmp.i = load i32* @dfm, align 4, !dbg !14
+ %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+ br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i: ; preds = %entry
+ switch i64 %cmd, label %if.then [
+ i64 2147772420, label %bb.i
+ i64 536897538, label %bb116.i
+ ], !dbg !22
+
+bb.i: ; preds = %if.end.i
+ unreachable
+
+bb116.i: ; preds = %if.end.i
+ unreachable
+
+if.then: ; preds = %if.end.i
+ ret i32 undef, !dbg !23
+
+if.else: ; preds = %entry
+ ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7, !8}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
new file mode 100644
index 0000000..2985224
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+ tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+ %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+ %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+ tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+ %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+ %add = add nsw i32 %tmp2, 1, !dbg !19
+ ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.bar = !{!6, !11}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!15 = metadata !{metadata !"int", metadata !16}
+!16 = metadata !{metadata !"omnipotent char", metadata !17}
+!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+
+; check that variable bar:b value range is appropriately trucated in debug info. Here Ltmp5 is end of
+; location range.
+
+;CHECK:Ltmp6
+;CHECK-NEXT: DEBUG_VALUE: bar:b <- undef
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT: .quad Ltmp
+;CHECK-NEXT: .quad Ltmp6
+;CHECK-NEXT: .short 1
+;CHECK-NEXT: .byte 85
+;CHECK-NEXT: .quad 0
+;CHECK-NEXT: .quad 0
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
deleted file mode 100644
index f0ada41..0000000
--- a/test/CodeGen/X86/div_const.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 | grep 365384439
-
-define i32 @f9188_mul365384439_shift27(i32 %A) {
- %tmp1 = udiv i32 %A, 1577682821 ; <i32> [#uses=1]
- ret i32 %tmp1
-}
-
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
new file mode 100644
index 0000000..7ceb972
--- /dev/null
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @test1(i16 zeroext %x) nounwind {
+entry:
+ %div = udiv i16 %x, 33
+ ret i16 %div
+; CHECK: test1:
+; CHECK: imull $63551, %eax, %eax
+; CHECK-NEXT: shrl $21, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+ %div = udiv i16 %c, 3
+ ret i16 %div
+
+; CHECK: test2:
+; CHECK: imull $43691, %eax, %eax
+; CHECK-NEXT: shrl $17, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+ %div = udiv i8 %c, 3
+ ret i8 %div
+
+; CHECK: test3:
+; CHECK: movzbl 8(%esp), %eax
+; CHECK-NEXT: imull $171, %eax, %eax
+; CHECK-NEXT: shrl $9, %eax
+; CHECK-NEXT: ret
+}
+
+define signext i16 @test4(i16 signext %x) nounwind {
+entry:
+ %div = sdiv i16 %x, 33 ; <i32> [#uses=1]
+ ret i16 %div
+; CHECK: test4:
+; CHECK: imull $-1985, %ecx, %ecx
+}
+
+define i32 @test5(i32 %A) nounwind {
+ %tmp1 = udiv i32 %A, 1577682821 ; <i32> [#uses=1]
+ ret i32 %tmp1
+; CHECK: test5:
+; CHECK: movl $365384439, %eax
+; CHECK: mull 4(%esp)
+}
+
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+ %div = sdiv i16 %x, 10
+ ret i16 %div
+; CHECK: test6:
+; CHECK: imull $26215, %eax, %eax
+; CHECK: shrl $31, %ecx
+; CHECK: sarl $18, %eax
+}
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
index c634c7e..9136175 100644
--- a/test/CodeGen/X86/dll-linkage.ll
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -3,7 +3,7 @@
declare dllimport void @foo()
define void @bar() nounwind {
-; CHECK: call *__imp__foo
+; CHECK: calll *__imp__foo
call void @foo()
ret void
}
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 3b26319..2ecd729 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -7,7 +7,7 @@
define i32 @"$foo"() nounwind {
; CHECK: movl ($bar),
; CHECK: addl ($qux),
-; CHECK: call ($hen)
+; CHECK: calll ($hen)
%m = load i32* @"$bar"
%n = load i32* @"$qux"
%t = add i32 %m, %n
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 0000000..9233d3f
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ %z.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ store i32 %z, i32* %z.addr, align 4
+ %tmp = load i32* %x.addr, align 4
+ %tmp1 = load i32* %y.addr, align 4
+ %add = add nsw i32 %tmp, %tmp1
+ %tmp2 = load i32* %z.addr, align 4
+ %add3 = add nsw i32 %add, %tmp2
+ ret i32 %add3
+}
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 8d7dc8f..4abc3b5 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,19 +1,23 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
; PR4684
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9.8"
-declare void @func2(<1 x i64>)
+declare void @func2(x86_mmx)
define void @func1() nounwind {
; This isn't spectacular, but it's MMX code at -O0...
-; CHECK: movl $2, %eax
-; CHECK: movd %rax, %mm0
-; CHECK: movd %mm0, %rdi
+; CHECK: movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code. The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
- call void @func2(<1 x i64> <i64 2>)
+ %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+ call void @func2(x86_mmx %tmp0)
ret void
}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 577dd72..622a1ff 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -70,3 +70,20 @@ entry:
; X64: test4:
; X64: 128(%r{{.*}},%r{{.*}},8)
}
+
+; PR8961 - Make sure the sext for the GEP addressing comes before the load that
+; is folded.
+define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
+ %v8 = getelementptr i8* %A, i32 %I
+ %v9 = bitcast i8* %v8 to i64*
+ %v10 = load i64* %v9
+ %v11 = add i64 %B, %v10
+ ret i64 %v11
+; X64: test5:
+; X64: movslq %esi, %rax
+; X64-NEXT: movq (%rdi,%rax), %rax
+; X64-NEXT: addq %rdx, %rax
+; X64-NEXT: ret
+}
+
+
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 35ec1e7..8db1936 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN: grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN: grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
@src = external global i32
+; rdar://6653118
define i32 @loadgv() nounwind {
entry:
%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@ entry:
%2 = add i32 %0, %1
store i32 %2, i32* @src
ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: movl L_src$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+; CHECK: addl (%ecx), %eax
+; CHECK: movl %eax, (%ecx)
+; CHECK: ret
+
}
%stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
entry:
store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
ret void
+; CHECK: _t:
+; CHECK: movl $0, %eax
+; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
+
}
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
new file mode 100644
index 0000000..2ffcb96
--- /dev/null
+++ b/test/CodeGen/X86/fltused.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 08ea77d..6966cf0 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT: {{((xor|and)ps|movd)}}
; These operations should be done in integer registers, eliminating constant
; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4bdf459..b216914 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN: grep {fucomi.*st.\[12\]}
+; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
; PR1012
define float @foo(float* %col.2.0) {
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 9393cf5..0e65cfd 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -11,9 +11,9 @@ define void @zap(i32 %a, i32 %b) nounwind {
entry:
; CHECK: movl {{[0-9]*}}(%esp), %ebx
; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
- ; CHECK-NEXT: call addtwo
+ ; CHECK-NEXT: calll addtwo
%0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
- ; CHECK: call foo
+ ; CHECK: calll foo
call void @foo() nounwind
ret void
}
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 6d21191..39a69e1 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -15,7 +15,7 @@
; const int G2 __attribute__((weak)) = 42;
-@G2 = weak_odr constant i32 42
+@G2 = weak_odr unnamed_addr constant i32 42
; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
@@ -26,7 +26,7 @@
; int * const G3 = &G1;
-@G3 = constant i32* @G1
+@G3 = unnamed_addr constant i32* @G1
; DARWIN: .section __DATA,__const
; DARWIN: .globl _G3
@@ -41,7 +41,7 @@
; _Complex long long const G4 = 34;
-@G4 = constant {i64,i64} { i64 34, i64 0 }
+@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
; DARWIN: .section __TEXT,__const
; DARWIN: _G4:
@@ -66,7 +66,7 @@
@"foo bar" = linkonce global i32 42
; LINUX: .type foo_20_bar,@object
-; LINUX:.section .gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
; LINUX: .weak foo_20_bar
; LINUX: foo_20_bar:
@@ -76,10 +76,10 @@
; DARWIN: "_foo bar":
; PR4650
-@G6 = weak_odr constant [1 x i8] c"\01"
+@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
; LINUX: .type G6,@object
-; LINUX: .section .gnu.linkonce.r.G6,"a",@progbits
+; LINUX: .section .rodata.G6,"aG",@progbits,G6,comdat
; LINUX: .weak G6
; LINUX: G6:
; LINUX: .byte 1
@@ -92,7 +92,7 @@
; DARWIN: .byte 1
-@G7 = constant [10 x i8] c"abcdefghi\00"
+@G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
; DARWIN: __TEXT,__cstring,cstring_literals
; DARWIN: .globl _G7
@@ -108,7 +108,7 @@
; LINUX-SECTIONS: .globl G7
-@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+@G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
; DARWIN: .section __TEXT,__const
; DARWIN: .globl _G8
@@ -118,7 +118,7 @@
; LINUX: .globl G8
; LINUX:G8:
-@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+@G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
; DARWIN: .globl _G9
; DARWIN: _G9:
diff --git a/test/CodeGen/X86/inline-asm-h.ll b/test/CodeGen/X86/inline-asm-h.ll
new file mode 100644
index 0000000..53cf419
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-h.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck %s
+
+@foobar = common global i32 0, align 4
+
+define void @zed() nounwind {
+entry:
+ call void asm "movq %mm2,${0:H}", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* @foobar) nounwind
+ ret void
+}
+
+; CHECK: zed
+; CHECK: movq %mm2,foobar+8(%rip)
diff --git a/test/CodeGen/X86/inline-asm-ptr-cast.ll b/test/CodeGen/X86/inline-asm-ptr-cast.ll
new file mode 100644
index 0000000..50e3021
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu <%s
+; ModuleID = 'bug.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func.flagmask = internal constant i64 1, align 8
+
+define void @func() nounwind {
+entry:
+ %src = alloca i32, align 4
+ %dst = alloca i32, align 4
+ %flags = alloca i64, align 8
+ %newflags = alloca i64, align 8
+ store i32 0, i32* %src, align 4
+ store i32 0, i32* %dst, align 4
+ store i64 1, i64* %flags, align 8
+ store i64 -1, i64* %newflags, align 8
+ %0 = bitcast i32* %dst to i8*
+ %tmp = load i64* %flags, align 8
+ %and = and i64 %tmp, 1
+ %1 = bitcast i32* %src to i8*
+ %tmp1 = load i8* %1
+ %2 = bitcast i32* %dst to i8*
+ %tmp2 = load i8* %2
+ call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 18aade2..3805cbb 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
; Test to check that we properly legalize an insert vector element
define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/legalize-sub-zero-2.ll b/test/CodeGen/X86/legalize-sub-zero-2.ll
new file mode 100644
index 0000000..f02ca71
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero-2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+define fastcc void @foo(i32 %type) nounwind optsize {
+entry:
+ switch i32 %type, label %bb26 [
+ i32 33634, label %bb11
+ i32 5121, label %bb27
+ ]
+
+bb11: ; preds = %entry
+ br label %bb27
+
+bb26: ; preds = %entry
+ unreachable
+
+bb27: ; preds = %bb11, %entry
+ %srcpb.0 = phi i32 [ 1, %bb11 ], [ 0, %entry ]
+ br i1 undef, label %bb348, label %bb30.lr.ph
+
+bb30.lr.ph: ; preds = %bb27
+ %.sum743 = shl i32 %srcpb.0, 1
+ %0 = mul i32 %srcpb.0, -2
+ %.sum745 = add i32 %.sum743, %0
+ br i1 undef, label %bb70, label %bb71
+
+bb70: ; preds = %bb30.lr.ph
+ unreachable
+
+bb71: ; preds = %bb30.lr.ph
+ br i1 undef, label %bb92, label %bb80
+
+bb80: ; preds = %bb71
+ unreachable
+
+bb92: ; preds = %bb71
+ %1 = getelementptr inbounds i8* undef, i32 %.sum745
+ unreachable
+
+bb348: ; preds = %bb27
+ ret void
+}
diff --git a/test/CodeGen/X86/legalize-sub-zero.ll b/test/CodeGen/X86/legalize-sub-zero.ll
new file mode 100644
index 0000000..ee76d46
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-win32
+
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+;target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %1 = fdiv <3 x double> zeroinitializer, undef
+ %2 = fdiv <2 x double> zeroinitializer, undef
+ %3 = shufflevector <2 x double> %2, <2 x double> undef, <3 x i32> <i32 0, i32
+1, i32 undef>
+ %4 = insertelement <3 x double> %3, double undef, i32 2
+ %5 = bitcast <3 x double> %1 to <3 x i64>
+ %6 = bitcast <3 x double> %4 to <3 x i64>
+ %7 = sub <3 x i64> %5, %6
+ %8 = shufflevector <3 x i64> %7, <3 x i64> undef, <2 x i32> <i32 0, i32 1>
+ %9 = xor <2 x i64> %8, zeroinitializer
+ %10 = add nsw <2 x i64> %9, zeroinitializer
+ %11 = shufflevector <2 x i64> %10, <2 x i64> undef, <3 x i32> <i32 0, i32 1,
+i32 undef>
+ %12 = insertelement <3 x i64> %11, i64 0, i32 2
+ %13 = shufflevector <3 x i64> %12, <3 x i64> undef, <4 x i32> <i32 0, i32 1,
+i32 2, i32 3>
+ %14 = shufflevector <4 x i64> %13, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+ %15 = bitcast <2 x i64> %14 to <4 x i32>
+ %16 = shufflevector <4 x i32> %15, <4 x i32> undef, <4 x i32> <i32 0, i32 2,
+i32 0, i32 2>
+ %17 = bitcast <4 x i32> %16 to <2 x i64>
+ %18 = shufflevector <2 x i64> %17, <2 x i64> undef, <2 x i32> <i32 0, i32 2>
+ %19 = bitcast <2 x i64> %18 to <4 x i32>
+ %20 = shufflevector <4 x i32> %19, <4 x i32> undef, <3 x i32> <i32 0, i32 1,
+i32 2>
+ %21 = or <3 x i32> %20, zeroinitializer
+ store <3 x i32> %21, <3 x i32> addrspace(1)* undef, align 16
+ ret void
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 574b46a..dff6931 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
-; RUN: grep {call.*divdi3} %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 | FileCheck %s
; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +11,7 @@
define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
%div.r = sdiv <2 x i64> %num, %div
ret <2 x i64> %div.r
-}
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
index 08306c2..c3d1938 100644
--- a/test/CodeGen/X86/licm-symbol.ll
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -3,7 +3,7 @@
; MachineLICM should be able to hoist the sF reference out of the loop.
; CHECK: pushl %esi
-; CHECK: subl $4, %esp
+; CHECK: pushl
; CHECK: movl $176, %esi
; CHECK: addl L___sF$non_lazy_ptr, %esi
; CHECK: .align 4, 0x90
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index 354d082..faba630 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -70,6 +70,7 @@ exit:
; Same as slightly_more_involved, but block_a is now a CFG diamond with
; fallthrough edges which should be preserved.
+; "callq block_a_merge_func" is tail duped.
; CHECK: yet_more_involved:
; CHECK: jmp .LBB2_1
@@ -78,12 +79,12 @@ exit:
; CHECK-NEXT: callq bar99
; CHECK-NEXT: callq get
; CHECK-NEXT: cmpl $2999, %eax
-; CHECK-NEXT: jg .LBB2_6
-; CHECK-NEXT: callq block_a_true_func
-; CHECK-NEXT: jmp .LBB2_7
-; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT: jle .LBB2_5
; CHECK-NEXT: callq block_a_false_func
-; CHECK-NEXT: .LBB2_7:
+; CHECK-NEXT: callq block_a_merge_func
+; CHECK-NEXT: jmp .LBB2_1
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT: callq block_a_true_func
; CHECK-NEXT: callq block_a_merge_func
; CHECK-NEXT: .LBB2_1:
; CHECK-NEXT: callq body
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index d2ff58b..2a97629 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -353,11 +353,11 @@ return:
; CHECK: count_me_3:
; CHECK: call
-; CHECK: movsd (%r15,%r13,8), %xmm0
-; CHECK: mulsd (%r14,%r13,8), %xmm0
-; CHECK: movsd %xmm0, (%r12,%r13,8)
-; CHECK: incq %r13
-; CHECK: cmpq %r13, %rbx
+; CHECK: movsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq %r{{.*}}
+; CHECK: cmpq %r{{.*}}, %r{{.*}}
; CHECK: jne
declare void @use(i64)
@@ -389,7 +389,7 @@ return:
; rdar://7657764
; CHECK: asd:
-; CHECK: BB9_5:
+; CHECK: BB9_4:
; CHECK-NEXT: addl (%r{{[^,]*}},%rdi,4), %e
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: cmpq %rdi, %r{{[^,]*}}
@@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry
; And the one at %bb68, where we want to be sure to use superhero mode:
-; CHECK: BB10_9:
+; CHECK: BB10_7:
; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,6 @@ bb5: ; preds = %bb3, %entry
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $-16, %r{{.*}}
-; CHECK-NEXT: BB10_10:
; CHECK-NEXT: cmpq $15, %r{{.*}}
; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a8afdc8..e284776 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -37,3 +37,43 @@ bb3:
declare void @bar(i32*)
declare fastcc i8* @foo(%struct.s2*) nounwind
+
+; rdar://8773371
+
+declare void @printf(...) nounwind
+
+define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
+; CHECK: commute:
+entry:
+ switch i32 %test_case, label %sw.bb307 [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry
+ %mul = mul nsw i32 %test_case, 3
+ %mul20 = mul nsw i32 %mul, %scale
+ br i1 undef, label %if.end34, label %sw.bb307
+
+if.end34: ; preds = %sw.bb
+; CHECK: %if.end34
+; CHECK: imull
+; CHECK: leal
+; CHECK-NOT: imull
+ tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+ %tmp = mul i32 %scale, %test_case
+ %tmp752 = mul i32 %tmp, 3
+ %tmp753 = zext i32 %tmp752 to i64
+ br label %bb.nph743.us
+
+for.body53.us: ; preds = %bb.nph743.us, %for.body53.us
+ %exitcond = icmp eq i64 undef, %tmp753
+ br i1 %exitcond, label %bb.nph743.us, label %for.body53.us
+
+bb.nph743.us: ; preds = %for.body53.us, %if.end34
+ br label %for.body53.us
+
+sw.bb307: ; preds = %sw.bb, %entry
+ ret void
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index b90d2e2..36be1f3 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -20,8 +20,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp2:
-; CHECK: movw (%rsi), %ax
-; CHECK: cmpw %ax, (%rdi)
+; CHECK: movw (%rdi), %ax
+; CHECK: cmpw (%rsi), %ax
}
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
@@ -54,8 +54,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp4:
-; CHECK: movl (%rsi), %eax
-; CHECK: cmpl %eax, (%rdi)
+; CHECK: movl (%rdi), %eax
+; CHECK: cmpl (%rsi), %eax
}
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
@@ -87,8 +87,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp8:
-; CHECK: movq (%rsi), %rax
-; CHECK: cmpq %rax, (%rdi)
+; CHECK: movq (%rdi), %rax
+; CHECK: cmpq (%rsi), %rax
}
define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 7bc31be..72342cb 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -9,8 +10,8 @@ entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
ret i8* %a
-; CHECK: test1:
-; CHECK: memcpy
+; LINUX: test1:
+; LINUX: memcpy
}
; Variable memcpy's should lower to calls.
@@ -21,18 +22,41 @@ entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
ret i8* %tmp14
-; CHECK: test2:
-; CHECK: memcpy
+; LINUX: test2:
+; LINUX: memcpy
}
; Large constant memcpy's should lower to a call when optimizing for size.
; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test3:
-; CHECK: memcpy
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
}
; Large constant memcpy's should be inlined when not optimizing for size.
@@ -40,18 +64,18 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test4:
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
}
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
deleted file mode 100644
index d405068..0000000
--- a/test/CodeGen/X86/memmove-0.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memcpy}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s, i64 %l)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
deleted file mode 100644
index 2057be8..0000000
--- a/test/CodeGen/X86/memmove-1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s, i64 %l)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
deleted file mode 100644
index 68a9f4d..0000000
--- a/test/CodeGen/X86/memmove-2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
deleted file mode 100644
index d8a419c..0000000
--- a/test/CodeGen/X86/memmove-3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 0e15595..993583b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=i386-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
define fastcc void @t1() nounwind {
entry:
; CHECK: t1:
-; CHECK: call _memset
+; CHECK: calll _memset
call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
unreachable
}
@@ -13,7 +13,27 @@ entry:
define fastcc void @t2(i8 signext %c) nounwind {
entry:
; CHECK: t2:
-; CHECK: call _memset
+; CHECK: calll _memset
call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
unreachable
}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+ ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+ ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index c0cd271..3f069b4 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movaps | count 5
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 5
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 20
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movq | count 10
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 7dcd84d..ded4b73 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -6,7 +6,7 @@ target triple = "i386-pc-mingw32"
define void @foo1(i32 %N) nounwind {
entry:
; CHECK: _foo1:
-; CHECK: call __alloca
+; CHECK: calll __alloca
%tmp14 = alloca i32, i32 %N ; <i32*> [#uses=1]
call void @bar1( i32* %tmp14 )
ret void
@@ -19,7 +19,7 @@ entry:
; CHECK: _foo2:
; CHECK: andl $-16, %esp
; CHECK: pushl %eax
-; CHECK: call __alloca
+; CHECK: calll __alloca
; CHECK: movl 8028(%esp), %eax
%A2 = alloca [2000 x i32], align 16 ; <[2000 x i32]*> [#uses=1]
%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/misaligned-memset.ll b/test/CodeGen/X86/misaligned-memset.ll
new file mode 100644
index 0000000..21f8bf2
--- /dev/null
+++ b/test/CodeGen/X86/misaligned-memset.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=nehalem < %s | FileCheck %s
+
+@a = common global [3 x i64] zeroinitializer, align 16
+
+define i32 @main() nounwind ssp {
+; CHECK: movups
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+ %0 = load i32* %retval
+ ret i32 %0
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 426e98e..b348512 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
;
; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
-; On Darwin x86-32, v1i64 values are passed in memory.
+; On Darwin x86-32, v1i64 values are passed in memory. In this example, they
+; are never moved into an MM register at all.
; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
-@u1 = external global <8 x i8>
+@u1 = external global x86_mmx
-define void @t1(<8 x i8> %v1) nounwind {
- store <8 x i8> %v1, <8 x i8>* @u1, align 8
+define void @t1(x86_mmx %v1) nounwind {
+ store x86_mmx %v1, x86_mmx* @u1, align 8
ret void
}
-@u2 = external global <1 x i64>
+@u2 = external global x86_mmx
define void @t2(<1 x i64> %v1) nounwind {
- store <1 x i64> %v1, <1 x i64>* @u2, align 8
+ %tmp = bitcast <1 x i64> %v1 to x86_mmx
+ store x86_mmx %tmp, x86_mmx* @u2, align 8
ret void
}
+
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index c42af08..c132d31 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
@g_v8qi = external global <8 x i8>
define void @t1() nounwind {
%tmp3 = load <8 x i8>* @g_v8qi, align 8
- %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
-define void @t2(<8 x i8> %v1, <8 x i8> %v2) nounwind {
- %tmp3 = add <8 x i8> %v1, %v2
- %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind {
+ %v1a = bitcast x86_mmx %v1 to <8 x i8>
+ %v2b = bitcast x86_mmx %v2 to <8 x i8>
+ %tmp3 = add <8 x i8> %v1a, %v2b
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index e4dfdbf..6817487 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,131 +1,309 @@
; RUN: llc < %s -march=x86 -mattr=+mmx
;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+;; First is a straight translation of the original with bitcasts as needed.
-define void @foo(<8 x i8>* %A, <8 x i8>* %B) {
+define void @foo(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <8 x i8>* %A ; <<8 x i8>> [#uses=1]
- %tmp3 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp4 = add <8 x i8> %tmp1, %tmp3 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp4, <8 x i8>* %A
- %tmp7 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp12 = tail call <8 x i8> @llvm.x86.mmx.padds.b( <8 x i8> %tmp4, <8 x i8> %tmp7 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp12, <8 x i8>* %A
- %tmp16 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp21 = tail call <8 x i8> @llvm.x86.mmx.paddus.b( <8 x i8> %tmp12, <8 x i8> %tmp16 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp21, <8 x i8>* %A
- %tmp27 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp28 = sub <8 x i8> %tmp21, %tmp27 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp28, <8 x i8>* %A
- %tmp31 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp36 = tail call <8 x i8> @llvm.x86.mmx.psubs.b( <8 x i8> %tmp28, <8 x i8> %tmp31 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp36, <8 x i8>* %A
- %tmp40 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp45 = tail call <8 x i8> @llvm.x86.mmx.psubus.b( <8 x i8> %tmp36, <8 x i8> %tmp40 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp45, <8 x i8>* %A
- %tmp51 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp52 = mul <8 x i8> %tmp45, %tmp51 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp52, <8 x i8>* %A
- %tmp57 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp58 = and <8 x i8> %tmp52, %tmp57 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp58, <8 x i8>* %A
- %tmp63 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp64 = or <8 x i8> %tmp58, %tmp63 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp64, <8 x i8>* %A
- %tmp69 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp70 = xor <8 x i8> %tmp64, %tmp69 ; <<8 x i8>> [#uses=1]
- store <8 x i8> %tmp70, <8 x i8>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+ %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+ %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2]
+ %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+ %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+ %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2]
+ %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+ %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+ %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2]
+ %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+ %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2]
+ %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+ store x86_mmx %tmp58a, x86_mmx* %A
+ %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+ %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2]
+ %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+ store x86_mmx %tmp64a, x86_mmx* %A
+ %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+ %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+ %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1]
+ %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+ store x86_mmx %tmp70a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-define void @baz(<2 x i32>* %A, <2 x i32>* %B) {
+define void @baz(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <2 x i32>* %A ; <<2 x i32>> [#uses=1]
- %tmp3 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp4 = add <2 x i32> %tmp1, %tmp3 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp4, <2 x i32>* %A
- %tmp9 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp10 = sub <2 x i32> %tmp4, %tmp9 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp10, <2 x i32>* %A
- %tmp15 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp16 = mul <2 x i32> %tmp10, %tmp15 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp16, <2 x i32>* %A
- %tmp21 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp22 = and <2 x i32> %tmp16, %tmp21 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp22, <2 x i32>* %A
- %tmp27 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp28 = or <2 x i32> %tmp22, %tmp27 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp28, <2 x i32>* %A
- %tmp33 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp34 = xor <2 x i32> %tmp28, %tmp33 ; <<2 x i32>> [#uses=1]
- store <2 x i32> %tmp34, <2 x i32>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+ %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+ %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2]
+ %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+ %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2]
+ %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp10a, x86_mmx* %A
+ %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+ %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+ %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2]
+ %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+ store x86_mmx %tmp16a, x86_mmx* %A
+ %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+ %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+ %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2]
+ %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp22a, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+ %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+ %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2]
+ %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+ %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+ %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1]
+ %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+ store x86_mmx %tmp34a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-define void @bar(<4 x i16>* %A, <4 x i16>* %B) {
+define void @bar(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <4 x i16>* %A ; <<4 x i16>> [#uses=1]
- %tmp3 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp4 = add <4 x i16> %tmp1, %tmp3 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp4, <4 x i16>* %A
- %tmp7 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp12 = tail call <4 x i16> @llvm.x86.mmx.padds.w( <4 x i16> %tmp4, <4 x i16> %tmp7 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp12, <4 x i16>* %A
- %tmp16 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp21 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp12, <4 x i16> %tmp16 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp21, <4 x i16>* %A
- %tmp27 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp28 = sub <4 x i16> %tmp21, %tmp27 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp28, <4 x i16>* %A
- %tmp31 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp36 = tail call <4 x i16> @llvm.x86.mmx.psubs.w( <4 x i16> %tmp28, <4 x i16> %tmp31 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp36, <4 x i16>* %A
- %tmp40 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp45 = tail call <4 x i16> @llvm.x86.mmx.psubus.w( <4 x i16> %tmp36, <4 x i16> %tmp40 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp45, <4 x i16>* %A
- %tmp51 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp52 = mul <4 x i16> %tmp45, %tmp51 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp52, <4 x i16>* %A
- %tmp55 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp60 = tail call <4 x i16> @llvm.x86.mmx.pmulh.w( <4 x i16> %tmp52, <4 x i16> %tmp55 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp60, <4 x i16>* %A
- %tmp64 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp69 = tail call <2 x i32> @llvm.x86.mmx.pmadd.wd( <4 x i16> %tmp60, <4 x i16> %tmp64 ) ; <<2 x i32>> [#uses=1]
- %tmp70 = bitcast <2 x i32> %tmp69 to <4 x i16> ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp70, <4 x i16>* %A
- %tmp75 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp76 = and <4 x i16> %tmp70, %tmp75 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp76, <4 x i16>* %A
- %tmp81 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp82 = or <4 x i16> %tmp76, %tmp81 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp82, <4 x i16>* %A
- %tmp87 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp88 = xor <4 x i16> %tmp82, %tmp87 ; <<4 x i16>> [#uses=1]
- store <4 x i16> %tmp88, <4 x i16>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+ %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+ %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2]
+ %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+ %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+ %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2]
+ %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+ %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+ %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2]
+ %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp60, x86_mmx* %A
+ %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
+ %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+ %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+ %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2]
+ %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+ store x86_mmx %tmp76a, x86_mmx* %A
+ %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+ %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+ %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2]
+ %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+ store x86_mmx %tmp82a, x86_mmx* %A
+ %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+ %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+ %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1]
+ %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+ store x86_mmx %tmp88a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>)
+;; The following is modified to use MMX intrinsics everywhere they work.
-declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>)
+define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp51a = bitcast x86_mmx %tmp51 to i64
+ %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
+ %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
+ %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2]
+ %tmp52a = bitcast <8 x i8> %tmp52 to i64
+ %tmp52aa = bitcast i64 %tmp52a to x86_mmx
+ store x86_mmx %tmp52aa, x86_mmx* %A
+ %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp58, x86_mmx* %A
+ %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp64, x86_mmx* %A
+ %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>)
+define void @baza(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp10, x86_mmx* %A
+ %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
+ %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+ %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2]
+ %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+ store x86_mmx %tmp16a, x86_mmx* %A
+ %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp22, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp34, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>)
+define void @bara(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp52, x86_mmx* %A
+ %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp60, x86_mmx* %A
+ %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
+ %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp76, x86_mmx* %A
+ %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp82, x86_mmx* %A
+ %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp88, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index 1fd8f67..8b1840a 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,26 +1,31 @@
; RUN: llc < %s -march=x86-64 | grep movd | count 4
-define i64 @foo(<1 x i64>* %p) {
- %t = load <1 x i64>* %p
- %u = add <1 x i64> %t, %t
- %s = bitcast <1 x i64> %u to i64
+define i64 @foo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @goo(<2 x i32>* %p) {
- %t = load <2 x i32>* %p
- %u = add <2 x i32> %t, %t
- %s = bitcast <2 x i32> %u to i64
+define i64 @goo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @hoo(<4 x i16>* %p) {
- %t = load <4 x i16>* %p
- %u = add <4 x i16> %t, %t
- %s = bitcast <4 x i16> %u to i64
+define i64 @hoo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @ioo(<8 x i8>* %p) {
- %t = load <8 x i8>* %p
- %u = add <8 x i8> %t, %t
- %s = bitcast <8 x i8> %u to i64
+define i64 @ioo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 0000000..3ac0e4e
--- /dev/null
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to i64
+ ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to i64
+ ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+ %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+ ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+ %0 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+ %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+ ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+ %0 = bitcast <1 x i64> %n to <8 x i8>
+ %1 = bitcast <1 x i64> %d to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %1 = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+ ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+ %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+ %1 = bitcast x86_mmx %0 to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to <1 x i64>
+ %3 = extractelement <1 x i64> %2, i32 0
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+ %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+ %1 = bitcast x86_mmx %0 to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to <1 x i64>
+ %3 = extractelement <1 x i64> %2, i32 0
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %1 = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+ %0 = bitcast <1 x i64> %a to <8 x i8>
+ %1 = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index a063ee1..348dac8 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
+; This is not an MMX operation; promoted to XMM.
-define <2 x i32> @qux(i32 %A) nounwind {
+define x86_mmx @qux(i32 %A) nounwind {
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %tmp3
+ %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+ ret x86_mmx %tmp4
}
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 3af09f4..6062b50 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
; PR2562
external global i16 ; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 0af7e01..689f7bf 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; There are no MMX operations in bork; promoted to XMM.
define void @bork(<1 x i64>* %x) {
+; CHECK: bork
+; CHECK: pextrd
entry:
%tmp2 = load <1 x i64>* %x ; <<1 x i64>> [#uses=1]
%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32> ; <<2 x i32>> [#uses=1]
@@ -11,4 +14,18 @@ entry:
ret void
}
+; pork uses MMX.
+
+define void @pork(x86_mmx* %x) {
+; CHECK: pork
+; CHECK: punpckhdq
+entry:
+ %tmp2 = load x86_mmx* %x ; <x86_mmx> [#uses=1]
+ %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
+ store x86_mmx %tmp9, x86_mmx* %x
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index dd0aa2c..bafc754 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -5,28 +5,28 @@
define i64 @t1(<1 x i64> %mm1) nounwind {
entry:
- %tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 ) ; <<1 x i64>> [#uses=1]
- %retval1112 = bitcast <1 x i64> %tmp6 to i64 ; <i64> [#uses=1]
+ %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+ %tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 ) ; <x86_mmx> [#uses=1]
+ %retval1112 = bitcast x86_mmx %tmp6 to i64
ret i64 %retval1112
}
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
-define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind {
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind {
entry:
- %tmp7 = tail call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %mm1, <2 x i32> %mm2 ) nounwind readnone ; <<2 x i32>> [#uses=1]
- %retval1112 = bitcast <2 x i32> %tmp7 to i64 ; <i64> [#uses=1]
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone ; <x86_mmx> [#uses=1]
+ %retval1112 = bitcast x86_mmx %tmp7 to i64
ret i64 %retval1112
}
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
-define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind {
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind {
entry:
- %tmp6 = bitcast <1 x i64> %mm1 to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone ; <<4 x i16>> [#uses=1]
- %retval1314 = bitcast <4 x i16> %tmp8 to i64 ; <i64> [#uses=1]
+ %tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone ; <x86_mmx> [#uses=1]
+ %retval1314 = bitcast x86_mmx %tmp8 to i64
ret i64 %retval1314
}
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index e3125c7..9f7501e 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -22,8 +22,10 @@ entry:
%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16> ; <<4 x i16>> [#uses=1]
%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 > ; <<4 x i16>> [#uses=1]
%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> %tmp555, i8* null )
+ %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+ %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 8253c20..a7ce7d9 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
%struct.vS1024 = type { [8 x <4 x i32>] }
%struct.vS512 = type { [4 x <4 x i32>] }
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
define void @t() nounwind {
entry:
@@ -12,14 +12,18 @@ entry:
bb554: ; preds = %bb554, %entry
%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ] ; <<1 x i64>> [#uses=1]
- %0 = load <1 x i64>* null, align 8 ; <<1 x i64>> [#uses=2]
- %1 = bitcast <1 x i64> %0 to <2 x i32> ; <<2 x i32>> [#uses=1]
+ %0 = load x86_mmx* null, align 8 ; <<1 x i64>> [#uses=2]
+ %1 = bitcast x86_mmx %0 to <2 x i32> ; <<2 x i32>> [#uses=1]
%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 > ; <<2 x i32>> [#uses=1]
- %2 = bitcast <2 x i32> %tmp555 to <1 x i64> ; <<1 x i64>> [#uses=1]
- %3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
+ %2 = bitcast <2 x i32> %tmp555 to x86_mmx ; <<1 x i64>> [#uses=1]
+ %3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
- %tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2 ; <<1 x i64>> [#uses=1]
- %4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
- %tmp562 = add <1 x i64> %4, %3 ; <<1 x i64>> [#uses=1]
+ %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+ %tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3 ; <<1 x i64>> [#uses=1]
+ %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+ %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
+ %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+ %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+ %tmp562 = add <1 x i64> %tmp6, %tmp7 ; <<1 x i64>> [#uses=1]
br label %bb554
}
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index d21e240..191e261 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index b04048b..00190e8 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,8 +1,57 @@
-; RUN: llc < %s -march=x86 | grep gs
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
-define i32 @foo() nounwind readonly {
+define i32 @test1() nounwind readonly {
entry:
%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
+; X32: test1:
+; X32: movl %gs:196, %eax
+; X32: movl (%eax), %eax
+; X32: ret
+
+; X64: test1:
+; X64: movq %gs:320, %rax
+; X64: movl (%rax), %eax
+; X64: ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+ %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+ tail call void %tmp9(i8* undef) nounwind optsize
+ ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl {{.*}}(%esp), %eax
+; X32: calll *%gs:(%eax)
+
+; X64: test2:
+; X64: callq *%gs:(%rdi)
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+ %0 = load i64 addrspace(256)* %p
+ %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+ %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+ %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+
+; X32: pmovsxwd_1:
+; X32: movl 4(%esp), %eax
+; X32: pmovsxwd %gs:(%eax), %xmm0
+; X32: ret
+
+; X64: pmovsxwd_1:
+; X64: pmovsxwd %gs:(%rdi), %xmm0
+; X64: ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/mult-alt-generic-i686.ll b/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 0000000..7c3499f
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+ ret void
+}
+
+define void @single_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @single_V() nounwind {
+entry:
+ ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @single_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @single_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 0000000..f35bb5e
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+ ret void
+}
+
+define void @single_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @single_V() nounwind {
+entry:
+ ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @single_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @single_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 0000000..06175da
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_a() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_b() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_c() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_d() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_S() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_D() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_A() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_f() nounwind {
+entry:
+ ret void
+}
+
+define void @single_t() nounwind {
+entry:
+ ret void
+}
+
+define void @single_u() nounwind {
+entry:
+ ret void
+}
+
+define void @single_y() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+ store double %0, double* @dout0, align 8
+ ret void
+}
+
+define void @single_x() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+ store double %0, double* @dout0, align 8
+ ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+ ret void
+}
+
+define void @single_I() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_J() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_K() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+; call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+; call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_N() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+; call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+; call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @single_e() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+ ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+ ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+; call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+; call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+; call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+; call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 0000000..ef27cbc
--- /dev/null
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @test1() nounwind readnone {
+bb.nph:
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %bb.nph
+ %tmp6 = load i32* undef, align 4
+ %and = or i64 undef, undef
+ %conv11 = zext i32 undef to i64
+ %conv14 = zext i32 %tmp6 to i64
+ %shl15 = shl i64 %conv14, 1
+ %shl15.masked = and i64 %shl15, 4294967294
+ %and17 = or i64 %shl15.masked, %conv11
+ %add = add i64 %and17, 1
+ %xor = xor i64 %add, %and
+ %tmp20 = load i64* undef, align 8
+ %add21 = add i64 %xor, %tmp20
+ %conv22 = trunc i64 %add21 to i32
+ store i32 %conv22, i32* undef, align 4
+ br i1 false, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: test2:
+; CHECK: sarl
+
+define i32 @test2() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %j = alloca i8, align 1
+ store i32 127, i32* %i, align 4
+ store i8 0, i8* %j, align 1
+ %tmp3 = load i32* %i, align 4
+ %mul = mul nsw i32 %tmp3, 2
+ %conv4 = trunc i32 %mul to i8
+ %conv5 = sext i8 %conv4 to i32
+ %div6 = sdiv i32 %conv5, 2
+ %conv7 = trunc i32 %div6 to i8
+ %conv9 = sext i8 %conv7 to i32
+ %cmp = icmp eq i32 %conv9, -1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ ret i32 0
+
+if.end: ; preds = %entry
+ call void @abort() noreturn
+ unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+ volatile store i32 128, i32* %P
+ %tmp4.pre = load i32* %P
+ %phitmp = trunc i32 %tmp4.pre to i16
+ %phitmp13 = shl i16 %phitmp, 8
+ %phitmp14 = ashr i16 %phitmp13, 8
+ %phitmp15 = lshr i16 %phitmp14, 8
+ %phitmp16 = zext i16 %phitmp15 to i32
+ ret i32 %phitmp16
+
+; CHECK: movl $128, (%rdi)
+; CHECK-NEXT: movsbl (%rdi), %eax
+; CHECK-NEXT: movzbl %ah, %eax
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 7842eb8..76e557b 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
-; RUN: not egrep {addsd|subsd|xor}
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | FileCheck %s
+; CHECK-NOT: {{addsd|subsd|xor}}
declare double @sin(double %f)
diff --git a/test/CodeGen/X86/non-globl-eh-frame.ll b/test/CodeGen/X86/non-globl-eh-frame.ll
new file mode 100644
index 0000000..71349ec
--- /dev/null
+++ b/test/CodeGen/X86/non-globl-eh-frame.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin10 -march x86 | not grep {{.globl\[\[:space:\]\]*__Z4funcv.eh}}
+; RUN: llc < %s -mtriple x86_64-apple-darwin9 -march x86 | FileCheck %s -check-prefix=DARWIN9
+
+%struct.__pointer_type_info_pseudo = type { %struct.__type_info_pseudo, i32, %"struct.std::type_info"* }
+%struct.__type_info_pseudo = type { i8*, i8* }
+%"struct.std::type_info" = type opaque
+
+@.str = private constant [12 x i8] c"hello world\00", align 1
+@_ZTIPc = external constant %struct.__pointer_type_info_pseudo
+
+define void @_Z4funcv() noreturn optsize ssp {
+entry:
+ %0 = tail call i8* @__cxa_allocate_exception(i64 8) nounwind
+ %1 = bitcast i8* %0 to i8**
+ store i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0), i8** %1, align 8
+ tail call void @__cxa_throw(i8* %0, i8* bitcast (%struct.__pointer_type_info_pseudo* @_ZTIPc to i8*), void (i8*)* null) noreturn
+ unreachable
+}
+
+; DARWIN9: .globl __Z4funcv.eh
+
+declare i8* @__cxa_allocate_exception(i64) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 8bed624..ef02af2 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
; PR1296
-; RUN: llc < %s -march=x86 | grep {movl \$1} | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 23c509c..13e804d 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 4
; PR2659
define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index a1a9759..dc5fcd7 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -12,7 +12,7 @@ entry:
ret void
; LINUX: test0:
-; LINUX: call .L0$pb
+; LINUX: calll .L0$pb
; LINUX-NEXT: .L0$pb:
; LINUX-NEXT: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
@@ -34,7 +34,7 @@ entry:
ret void
; LINUX: test1:
-; LINUX: call .L1$pb
+; LINUX: calll .L1$pb
; LINUX-NEXT: .L1$pb:
; LINUX-NEXT: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
@@ -54,12 +54,12 @@ entry:
; LINUX: test2:
; LINUX: pushl %ebx
; LINUX-NEXT: subl $8, %esp
-; LINUX-NEXT: call .L2$pb
+; LINUX-NEXT: calll .L2$pb
; LINUX-NEXT: .L2$pb:
; LINUX-NEXT: popl %ebx
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
; LINUX: movl $40, (%esp)
-; LINUX: call malloc@PLT
+; LINUX: calll malloc@PLT
; LINUX: addl $8, %esp
; LINUX: popl %ebx
; LINUX: ret
@@ -75,13 +75,13 @@ entry:
call void(...)* %tmp1()
ret void
; LINUX: test3:
-; LINUX: call .L3$pb
+; LINUX: calll .L3$pb
; LINUX-NEXT: .L3$pb:
; LINUX: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
; LINUX: movl pfoo@GOT(%[[REG3]]),
-; LINUX: call afoo@PLT
-; LINUX: call *
+; LINUX: calll afoo@PLT
+; LINUX: calll *
}
declare void(...)* @afoo(...)
@@ -91,10 +91,10 @@ entry:
call void(...)* @foo()
ret void
; LINUX: test4:
-; LINUX: call .L4$pb
+; LINUX: calll .L4$pb
; LINUX: popl %ebx
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
-; LINUX: call foo@PLT
+; LINUX: calll foo@PLT
}
declare void @foo(...)
@@ -112,7 +112,7 @@ entry:
ret void
; LINUX: test5:
-; LINUX: call .L5$pb
+; LINUX: calll .L5$pb
; LINUX-NEXT: .L5$pb:
; LINUX-NEXT: popl %eax
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
@@ -134,7 +134,7 @@ entry:
; LINUX: .LCPI6_0:
; LINUX: test6:
-; LINUX: call .L6$pb
+; LINUX: calll .L6$pb
; LINUX: .L6$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb),
; LINUX: fldl .LCPI6_0@GOTOFF(
@@ -186,7 +186,7 @@ bb12:
ret void
; LINUX: test7:
-; LINUX: call .L7$pb
+; LINUX: calll .L7$pb
; LINUX: .L7$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
; LINUX: .LJTI7_0@GOTOFF(
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 31071bc..b6761e3 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | grep -F .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep 'lJTI'
; rdar://6971437
diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 0000000..430214c
--- /dev/null
+++ b/test/CodeGen/X86/popcnt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+ %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+ ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+ %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+ ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+ %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+ ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+ %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+ ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 97cc7b4..902c69b 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -68,7 +68,7 @@ bb26.preheader: ; preds = %imix_test.exit
bb23: ; preds = %imix_test.exit
unreachable
-; X86-32: %bb26.preheader.bb28_crit_edge
+; X86-32: %bb26.preheader
; X86-32: movl -16(%ebp),
; X86-32-NEXT: .align 4
; X86-32-NEXT: %bb28
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index e5daf5d..54d043d 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
; PR2659
@@ -14,10 +14,11 @@ forcond.preheader: ; preds = %entry
%cmp44 = icmp eq i32 %k, 0 ; <i1> [#uses=1]
br i1 %cmp44, label %afterfor, label %forbody
-; CHECK: %forcond.preheader.forbody_crit_edge
+; CHECK: %forcond.preheader
; CHECK: movl $1
; CHECK-NOT: xorl
-; CHECK-NEXT: movl
+; CHECK-NOT: movl
+; CHECK-NEXT: je
ifthen: ; preds = %entry
ret i32 0
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 7cdeaa0..da16237 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
; PR3522
target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
new file mode 100644
index 0000000..45b0c6c
--- /dev/null
+++ b/test/CodeGen/X86/pr9127.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @foobar(double %d, double* %x) {
+entry:
+ %tmp2 = load double* %x, align 8
+ %cmp = fcmp oeq double %tmp2, %d
+ %conv3 = zext i1 %cmp to i8
+ ret i8 %conv3
+}
+
+; test that the load is folded.
+; CHECK: ucomisd (%rdi), %xmm0
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index fac5915..48d2673 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mattr=+sse > %t
-; RUN: grep prefetchnta %t
-; RUN: grep prefetcht0 %t
-; RUN: grep prefetcht1 %t
-; RUN: grep prefetcht2 %t
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
define void @t(i8* %ptr) nounwind {
entry:
+; CHECK: prefetcht2
+; CHECK: prefetcht1
+; CHECK: prefetcht0
+; CHECK: prefetchnta
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 276f8bb..9291200 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -8,14 +8,14 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
-@a = internal constant [2 x i32] [i32 1, i32 2]
-@a1 = constant [2 x i32] [i32 1, i32 2]
-@e = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@e1 = constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@p = constant i8* bitcast ([2 x i32]* @a to i8*)
-@t = constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
-@p1 = constant i8* bitcast ([2 x i32]* @a1 to i8*)
-@t1 = constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@e = internal unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = unnamed_addr constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = unnamed_addr constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
@p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
@t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
@p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index 77f320f..adc58ac 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-mmx -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
; Verify when widening a divide/remainder operation, we only generate a
; divide/rem per element since divide/remainder can trap.
diff --git a/test/CodeGen/X86/select-aggregate.ll b/test/CodeGen/X86/select-aggregate.ll
deleted file mode 100644
index 44cafe2..0000000
--- a/test/CodeGen/X86/select-aggregate.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-; PR5757
-
-; CHECK: cmovneq %rdi, %rsi
-; CHECK: movl (%rsi), %eax
-
-%0 = type { i64, i32 }
-
-define i32 @foo(%0* %p, %0* %q, i1 %r) nounwind {
- %t0 = load %0* %p
- %t1 = load %0* %q
- %t4 = select i1 %r, %0 %t0, %0 %t1
- %t5 = extractvalue %0 %t4, 1
- ret i32 %t5
-}
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
deleted file mode 100644
index c38a020..0000000
--- a/test/CodeGen/X86/select-zero-one.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
-
-@r1 = weak global i32 0
-
-define void @t1(i32 %a, double %b) {
- %tmp114 = fcmp ugt double %b, 1.000000e-09
- %tmp120 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
- %bothcond = or i1 %tmp114, %tmp120 ; <i1> [#uses=1]
- %storemerge = select i1 %bothcond, i32 0, i32 1 ; <i32> [#uses=2]
- store i32 %storemerge, i32* @r1, align 4
- ret void
-}
-
-@r2 = weak global i8 0
-
-define void @t2(i32 %a, double %b) {
- %tmp114 = fcmp ugt double %b, 1.000000e-09
- %tmp120 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
- %bothcond = or i1 %tmp114, %tmp120 ; <i1> [#uses=1]
- %storemerge = select i1 %bothcond, i8 0, i8 1 ; <i32> [#uses=2]
- store i8 %storemerge, i8* @r2, align 4
- ret void
-}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 95ed9e9..ce04e07 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,63 +1,220 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium
-; RUN: llc < %s -march=x86 -mcpu=yonah
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep set
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; PR5757
-define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
- %X = select i1 %A, i1 %B, i1 %C ; <i1> [#uses=1]
- ret i1 %X
+%0 = type { i64, i32 }
+
+define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
+ %t0 = load %0* %p
+ %t1 = load %0* %q
+ %t4 = select i1 %r, %0 %t0, %0 %t1
+ %t5 = extractvalue %0 %t4, 1
+ ret i32 %t5
+; CHECK: test1:
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+}
+
+
+; PR2139
+define i32 @test2() nounwind {
+entry:
+ %tmp73 = tail call i1 @return_false() ; <i8> [#uses=1]
+ %g.0 = select i1 %tmp73, i16 0, i16 -480 ; <i16> [#uses=2]
+ %tmp7778 = sext i16 %g.0 to i32 ; <i32> [#uses=1]
+ %tmp80 = shl i32 %tmp7778, 3 ; <i32> [#uses=2]
+ %tmp87 = icmp sgt i32 %tmp80, 32767 ; <i1> [#uses=1]
+ br i1 %tmp87, label %bb90, label %bb91
+bb90: ; preds = %bb84, %bb72
+ unreachable
+bb91: ; preds = %bb84
+ ret i32 0
+; CHECK: test2:
+; CHECK: movnew
+; CHECK: movswl
+}
+
+declare i1 @return_false()
+
+
+;; Select between two floating point constants.
+define float @test3(i32 %x) nounwind readnone {
+entry:
+ %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01 ; <float> [#uses=1]
+ ret float %iftmp.0.0
+; CHECK: test3:
+; CHECK: movss {{.*}},4), %xmm0
+}
+
+define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+ %0 = fcmp olt double %F, 4.200000e+01 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %0, i32 4, i32 0 ; <i32> [#uses=1]
+ %1 = getelementptr i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
+ %2 = load i8* %1, align 1 ; <i8> [#uses=1]
+ ret i8 %2
+; CHECK: test4:
+; CHECK: movsbl ({{.*}},4), %eax
+}
+
+define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
+ %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+ store <2 x i16> %x, <2 x i16>* %p
+ ret void
+; CHECK: test5:
}
-define i8 @byteSel(i1 %A, i8 %B, i8 %C) nounwind {
- %X = select i1 %A, i8 %B, i8 %C ; <i8> [#uses=1]
- ret i8 %X
+define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
+ %tmp = load <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=2]
+ %tmp9 = fmul <4 x float> %tmp3, %tmp3 ; <<4 x float>> [#uses=1]
+ %tmp.upgrd.1 = icmp eq i32 %C, 0 ; <i1> [#uses=1]
+ %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp ; <<4 x float>> [#uses=1]
+ store <4 x float> %iftmp.38.0, <4 x float>* %A
+ ret void
+; Verify that the fmul gets sunk into the one part of the diamond where it is
+; needed.
+; CHECK: test6:
+; CHECK: jne
+; CHECK: mulps
+; CHECK: ret
+; CHECK: ret
}
-define i16 @shortSel(i1 %A, i16 %B, i16 %C) nounwind {
- %X = select i1 %A, i16 %B, i16 %C ; <i16> [#uses=1]
- ret i16 %X
+; Select with fp80's
+define x86_fp80 @test7(i32 %tmp8) nounwind {
+ %tmp9 = icmp sgt i32 %tmp8, -1 ; <i1> [#uses=1]
+ %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
+ ret x86_fp80 %retval
+; CHECK: test7:
+; CHECK: leaq
+; CHECK: fldt (%r{{.}}x,%r{{.}}x)
}
-define i32 @intSel(i1 %A, i32 %B, i32 %C) nounwind {
- %X = select i1 %A, i32 %B, i32 %C ; <i32> [#uses=1]
- ret i32 %X
+; widening select v6i32 and then a sub
+define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+ %x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+ %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ store <6 x i32> %val, <6 x i32>* %dst.addr
+ ret void
+
+; CHECK: test8:
}
-define i64 @longSel(i1 %A, i64 %B, i64 %C) nounwind {
- %X = select i1 %A, i64 %B, i64 %C ; <i64> [#uses=1]
- ret i64 %X
+
+;; Test integer select between values and constants.
+
+define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp ne i64 %x, 0
+ %cond = select i1 %cmp, i64 %y, i64 -1
+ ret i64 %cond
+; CHECK: test9:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: orq %rsi, %rax
+; CHECK: ret
+}
+
+;; Same as test9
+define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp eq i64 %x, 0
+ %cond = select i1 %cmp, i64 -1, i64 %y
+ ret i64 %cond
+; CHECK: test9a:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: orq %rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp eq i64 %x, 0
+ %A = sext i1 %cmp to i64
+ %cond = or i64 %y, %A
+ ret i64 %cond
+; CHECK: test9b:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: orq %rsi, %rax
+; CHECK: ret
}
-define double @doubleSel(i1 %A, double %B, double %C) nounwind {
- %X = select i1 %A, double %B, double %C ; <double> [#uses=1]
- ret double %X
+;; Select between -1 and 1.
+define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp eq i64 %x, 0
+ %cond = select i1 %cmp, i64 -1, i64 1
+ ret i64 %cond
+; CHECK: test10:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: orq $1, %rax
+; CHECK: ret
}
-define i8 @foldSel(i1 %A, i8 %B, i8 %C) nounwind {
- %Cond = icmp slt i8 %B, %C ; <i1> [#uses=1]
- %X = select i1 %Cond, i8 %B, i8 %C ; <i8> [#uses=1]
- ret i8 %X
+
+
+define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp eq i64 %x, 0
+ %cond = select i1 %cmp, i64 %y, i64 -1
+ ret i64 %cond
+; CHECK: test11:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: notq %rax
+; CHECK: orq %rsi, %rax
+; CHECK: ret
}
-define i32 @foldSel2(i1 %A, i32 %B, i32 %C) nounwind {
- %Cond = icmp eq i32 %B, %C ; <i1> [#uses=1]
- %X = select i1 %Cond, i32 %B, i32 %C ; <i32> [#uses=1]
- ret i32 %X
+define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+ %cmp = icmp ne i64 %x, 0
+ %cond = select i1 %cmp, i64 -1, i64 %y
+ ret i64 %cond
+; CHECK: test11a:
+; CHECK: cmpq $1, %rdi
+; CHECK: sbbq %rax, %rax
+; CHECK: notq %rax
+; CHECK: orq %rsi, %rax
+; CHECK: ret
}
-define i32 @foldSel2a(i1 %A, i32 %B, i32 %C, double %X, double %Y) nounwind {
- %Cond = fcmp olt double %X, %Y ; <i1> [#uses=1]
- %X.upgrd.1 = select i1 %Cond, i32 %B, i32 %C ; <i32> [#uses=1]
- ret i32 %X.upgrd.1
+
+declare noalias i8* @_Znam(i64) noredzone
+
+define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
+entry:
+ %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
+ %B = extractvalue { i64, i1 } %A, 1
+ %C = extractvalue { i64, i1 } %A, 0
+ %D = select i1 %B, i64 -1, i64 %C
+ %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
+ ret i8* %call
+; CHECK: test12:
+; CHECK: mulq
+; CHECK: movq $-1, %rdi
+; CHECK: cmovnoq %rax, %rdi
+; CHECK: jmp __Znam
}
-define float @foldSel3(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
- %Cond = icmp ult i32 %X, %Y ; <i1> [#uses=1]
- %X.upgrd.2 = select i1 %Cond, float %B, float %C ; <float> [#uses=1]
- ret float %X.upgrd.2
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+define i32 @test13(i32 %a, i32 %b) nounwind {
+ %c = icmp ult i32 %a, %b
+ %d = sext i1 %c to i32
+ ret i32 %d
+; CHECK: test13:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: ret
}
-define float @nofoldSel4(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
- %Cond = icmp slt i32 %X, %Y ; <i1> [#uses=1]
- %X.upgrd.3 = select i1 %Cond, float %B, float %C ; <float> [#uses=1]
- ret float %X.upgrd.3
+define i32 @test14(i32 %a, i32 %b) nounwind {
+ %c = icmp uge i32 %a, %b
+ %d = sext i1 %c to i32
+ ret i32 %d
+; CHECK: test14:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: notl
+; CHECK-NEXT: ret
}
+
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
deleted file mode 100644
index 4aca040..0000000
--- a/test/CodeGen/X86/sext-select.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep movsw
-; PR2139
-
-declare void @abort()
-
-define i32 @main() {
-entry:
- %tmp73 = tail call i1 @return_false() ; <i8> [#uses=1]
- %g.0 = select i1 %tmp73, i16 0, i16 -480 ; <i16> [#uses=2]
- %tmp7778 = sext i16 %g.0 to i32 ; <i32> [#uses=1]
- %tmp80 = shl i32 %tmp7778, 3 ; <i32> [#uses=2]
- %tmp87 = icmp sgt i32 %tmp80, 32767 ; <i1> [#uses=1]
- br i1 %tmp87, label %bb90, label %bb91
-bb90: ; preds = %bb84, %bb72
- tail call void @abort()
- unreachable
-bb91: ; preds = %bb84
- ret i32 0
-}
-
-define i1 @return_false() {
- ret i1 0
-}
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 48ca36c..d9c3061 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,21 +1,21 @@
; RUN: llc < %s -march=x86 | \
; RUN: grep {s\[ah\]\[rl\]l} | count 1
-define i32* @test1(i32* %P, i32 %X) {
+define i32* @test1(i32* %P, i32 %X) nounwind {
%Y = lshr i32 %X, 2 ; <i32> [#uses=1]
%gep.upgrd.1 = zext i32 %Y to i64 ; <i64> [#uses=1]
%P2 = getelementptr i32* %P, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
ret i32* %P2
}
-define i32* @test2(i32* %P, i32 %X) {
+define i32* @test2(i32* %P, i32 %X) nounwind {
%Y = shl i32 %X, 2 ; <i32> [#uses=1]
%gep.upgrd.2 = zext i32 %Y to i64 ; <i64> [#uses=1]
%P2 = getelementptr i32* %P, i64 %gep.upgrd.2 ; <i32*> [#uses=1]
ret i32* %P2
}
-define i32* @test3(i32* %P, i32 %X) {
+define i32* @test3(i32* %P, i32 %X) nounwind {
%Y = ashr i32 %X, 2 ; <i32> [#uses=1]
%P2 = getelementptr i32* %P, i32 %Y ; <i32*> [#uses=1]
ret i32* %P2
diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll
index f0d66cf..f97abe00 100644
--- a/test/CodeGen/X86/sibcall-3.ll
+++ b/test/CodeGen/X86/sibcall-3.ll
@@ -3,7 +3,7 @@
define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
; CHECK: t1:
-; CHECK: call 0
+; CHECK: calll 0
tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
ret void
}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
new file mode 100644
index 0000000..9d74121
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; X32: foo:
+; X32: jmp _sin$stub
+
+; X64: foo:
+; X64: jmp _sin
+ %0 = tail call double @sin(double %a) nounwind readonly
+ ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; X32: bar:
+; X32: jmp _sinf$stub
+
+; X64: bar:
+; X64: jmp _sinf
+entry:
+ %0 = tail call float @sinf(float %a) nounwind readonly
+ ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index a3c9957..de2a81e8 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
-; Darwin 8 generates stubs, which don't match
-; XFAIL: apple-darwin8
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
define void @t1(i32 %x) nounwind ssp {
entry:
@@ -45,7 +43,7 @@ declare i32 @foo3()
define void @t4(void (i32)* nocapture %x) nounwind ssp {
entry:
; 32: t4:
-; 32: call *
+; 32: calll *
; FIXME: gcc can generate a tailcall for this. But it's tricky.
; 64: t4:
@@ -71,7 +69,7 @@ entry:
define i32 @t6(i32 %x) nounwind ssp {
entry:
; 32: t6:
-; 32: call {{_?}}t6
+; 32: calll {{_?}}t6
; 32: jmp {{_?}}bar
; 64: t6:
@@ -108,7 +106,7 @@ declare i32 @bar2(i32, i32, i32)
define signext i16 @t8() nounwind ssp {
entry:
; 32: t8:
-; 32: call {{_?}}bar3
+; 32: calll {{_?}}bar3
; 64: t8:
; 64: callq {{_?}}bar3
@@ -121,7 +119,7 @@ declare signext i16 @bar3()
define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
entry:
; 32: t9:
-; 32: call *
+; 32: calll *
; 64: t9:
; 64: callq *
@@ -133,7 +131,7 @@ entry:
define void @t10() nounwind ssp {
entry:
; 32: t10:
-; 32: call
+; 32: calll
; 64: t10:
; 64: callq
@@ -205,12 +203,12 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
; 32: t13:
; 32-NOT: jmp
-; 32: call
+; 32: calll
; 32: ret
; 64: t13:
; 64-NOT: jmp
-; 64: call
+; 64: callq
; 64: ret
entry:
%0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
@@ -248,7 +246,7 @@ entry:
define void @t15(%struct.foo* noalias sret %agg.result) nounwind {
; 32: t15:
-; 32: call {{_?}}f
+; 32: calll {{_?}}f
; 32: ret $4
; 64: t15:
@@ -263,7 +261,7 @@ declare void @f(%struct.foo* noalias sret) nounwind
define void @t16() nounwind ssp {
entry:
; 32: t16:
-; 32: call {{_?}}bar4
+; 32: calll {{_?}}bar4
; 32: fstp
; 64: t16:
@@ -293,7 +291,7 @@ declare void @bar5(...)
define void @t18() nounwind ssp {
entry:
; 32: t18:
-; 32: call {{_?}}bar6
+; 32: calll {{_?}}bar6
; 32: fstp %st(0)
; 64: t18:
@@ -309,7 +307,7 @@ define void @t19() alignstack(32) nounwind {
entry:
; CHECK: t19:
; CHECK: andl $-32
-; CHECK: call {{_?}}foo
+; CHECK: calll {{_?}}foo
tail call void @foo() nounwind
ret void
}
@@ -323,7 +321,7 @@ declare void @foo()
define double @t20(double %x) nounwind {
entry:
; 32: t20:
-; 32: call {{_?}}foo20
+; 32: calll {{_?}}foo20
; 32: fldl (%esp)
; 64: t20:
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index acba528..31f41ee 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -6,10 +6,11 @@
; that it's conditionally evaluated.
; CHECK: foo:
-; CHECK: divsd
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
+; CHECK-NEXT: je
; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK: divsd
define double @foo(double %x, double %y, i1 %c) nounwind {
%a = fdiv double %x, 3.2
@@ -18,6 +19,24 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
ret double %z
}
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK: movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+ %a = fdiv double %x, 3.2
+ %z = select i1 %c, double %a, double %y
+ ret double %z
+}
+
+
; Hoist floating-point constant-pool loads out of loops.
; CHECK: bar:
@@ -68,9 +87,9 @@ return:
; Codegen should hoist and CSE these constants.
; CHECK: vv:
-; CHECK: LCPI2_0(%rip), %xmm0
-; CHECK: LCPI2_1(%rip), %xmm1
-; CHECK: LCPI2_2(%rip), %xmm2
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
; CHECK: align
; CHECK-NOT: LCPI
; CHECK: ret
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
deleted file mode 100644
index 07d4d52..0000000
--- a/test/CodeGen/X86/split-select.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86-64 | grep test | count 1
-
-define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
- %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
- store <2 x i16> %x, <2 x i16>* %p
- ret void
-}
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index 3cc83ca..9f5d4b4 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 6fc0190..5c3e32f 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -192,3 +192,33 @@ entry:
; CHECK: test15:
; CHECK: movhlps %xmm1, %xmm0
}
+
+; PR8900
+; CHECK: test16:
+; CHECK: unpcklpd
+; CHECK: ret
+
+define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+ %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
+ %i6 = load <4 x double>* %i5, align 32
+ %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+ ret <2 x double> %i7
+}
+
+; PR9009
+define fastcc void @test17() nounwind {
+entry:
+ %0 = insertelement <4 x i32> undef, i32 undef, i32 1
+ %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %2 = bitcast <4 x i32> %1 to <4 x float>
+ store <4 x float> %2, <4 x float> * undef
+ ret void
+}
+
+; PR9210
+define <4 x float> @f(<4 x double>) nounwind {
+entry:
+ %double2float.i = fptrunc <4 x double> %0 to <4 x float>
+ ret <4 x float> %double2float.i
+}
+
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 206cdff..9a60091 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -169,7 +169,7 @@ define internal void @t10() nounwind {
ret void
; X64: t10:
; X64: pextrw $4, %xmm0, %eax
-; X64: movlhps %xmm1, %xmm1
+; X64: unpcklpd %xmm1, %xmm1
; X64: pshuflw $8, %xmm1, %xmm1
; X64: pinsrw $2, %eax, %xmm1
; X64: pextrw $6, %xmm0, %eax
@@ -260,3 +260,18 @@ entry:
; X64: pinsrw $1, %eax, %xmm0
; X64: ret
}
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64: movddup (%rax), %xmm0
+ %tmp1 = load <4 x float>* undef, align 16
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ %tmp3 = load <4 x float>* undef, align 16
+ %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+ %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+ ret <4 x i32> %tmp7
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 3a14fa2..2ac4cb4 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
ret i32 %tmp1
; X32: _ptestz_2:
; X32: ptest %xmm1, %xmm0
-; X32: setb %al
+; X32: sbbl %eax
; X64: _ptestz_2:
; X64: ptest %xmm1, %xmm0
-; X64: setb %al
+; X64: sbbl %eax
}
define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 8ca0b12..793c026 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
target triple = "i686-apple-darwin8"
@G = external global double
-define void @test({ double, double }* byval %z, double* %P) {
+define void @test({ double, double }* byval %z, double* %P) nounwind {
entry:
%tmp3 = load double* @G, align 16 ; <double> [#uses=1]
%tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
@@ -21,14 +21,14 @@ entry:
ret void
}
-define void @test2() alignstack(16) {
+define void @test2() alignstack(16) nounwind {
entry:
; CHECK: andl{{.*}}$-16, %esp
ret void
}
; Use a call to force a spill.
-define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
entry:
; CHECK: andl{{.*}}$-32, %esp
call void @test2()
@@ -38,3 +38,14 @@ entry:
declare double @fabs(double)
+; The pointer is already known aligned, so and x,-16 is eliminable.
+define i32 @test4() nounwind {
+entry:
+ %buffer = alloca [2048 x i8], align 16
+ %0 = ptrtoint [2048 x i8]* %buffer to i32
+ %and = and i32 %0, -16
+ ret i32 %and
+; CHECK: test4:
+; CHECK-NOT: and
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/stdcall-notailcall.ll b/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 0000000..8e33c30
--- /dev/null
+++ b/test/CodeGen/X86/stdcall-notailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+ tail call void @foo()
+ ret void
+}
+
+declare void @foo()
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index abc5174..0dd228e 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -152,3 +152,17 @@ define void @test9() nounwind {
store i32 %or, i32* @g_16
ret void
}
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl (%rdi), %eax
+; X64-NEXT: shrl $8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+ %tmp = load i8* %P, align 1
+ %conv = sext i8 %tmp to i32
+ %shr3 = lshr i32 %conv, 8
+ %conv2 = trunc i32 %shr3 to i8
+ ret i8 %conv2
+}
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 46e59e9..1168622 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
target datalayout = "e-p:32:32"
%struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index ed3266e..9f491d4 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -49,3 +49,33 @@ sw.epilog: ; preds = %sw.default, %sw.bb4
}
declare void @foo(i32)
+
+; Don't zero extend the test operands to pointer type if it can be avoided.
+; rdar://8781238
+define void @test2(i32 %x) nounwind ssp {
+; CHECK: test2:
+; CHECK: cmpl $6
+; CHECK: ja
+
+; CHECK-NEXT: movl $91
+; CHECK-NOT: movl
+; CHECK-NEXT: btl
+; CHECK-NEXT: jb
+entry:
+ switch i32 %x, label %if.end [
+ i32 6, label %if.then
+ i32 4, label %if.then
+ i32 3, label %if.then
+ i32 1, label %if.then
+ i32 0, label %if.then
+ ]
+
+if.then: ; preds = %entry, %entry, %entry, %entry, %entry
+ tail call void @bar() nounwind
+ ret void
+
+if.end: ; preds = %entry
+ ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
new file mode 100644
index 0000000..75832c7
--- /dev/null
+++ b/test/CodeGen/X86/switch-or.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
+
+; Check that merging switch cases that differ in one bit works.
+; CHECK: orl $2
+; CHECK-NEXT: cmpl $6
+
+define void @foo(i32 %variable) nounwind {
+entry:
+ switch i32 %variable, label %if.end [
+ i32 4, label %if.then
+ i32 6, label %if.then
+ ]
+
+if.then:
+ %call = tail call i32 (...)* @bar() nounwind
+ ret void
+
+if.end:
+ ret void
+}
+
+declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 9662ad6..9291695 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -62,11 +62,11 @@ declare i8* @choose(i8*, i8*)
; CHECK: tail_duplicate_me:
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
define void @tail_duplicate_me() nounwind {
entry:
@@ -153,19 +153,16 @@ bb30:
; an unconditional jump to complete a two-way conditional branch.
; CHECK: c_expand_expr_stmt:
-; CHECK: jmp .LBB3_7
-; CHECK-NEXT: .LBB3_12:
+; CHECK: jmp .LBB3_11
+; CHECK-NEXT: .LBB3_9:
; CHECK-NEXT: movq 8(%rax), %rax
+; CHECK-NEXT: xorb %dl, %dl
; CHECK-NEXT: movb 16(%rax), %al
; CHECK-NEXT: cmpb $16, %al
-; CHECK-NEXT: je .LBB3_6
+; CHECK-NEXT: je .LBB3_11
; CHECK-NEXT: cmpb $23, %al
-; CHECK-NEXT: je .LBB3_6
-; CHECK-NEXT: jmp .LBB3_15
-; CHECK-NEXT: .LBB3_14:
-; CHECK-NEXT: cmpb $23, %bl
-; CHECK-NEXT: jne .LBB3_15
-; CHECK-NEXT: .LBB3_15:
+; CHECK-NEXT: jne .LBB3_14
+; CHECK-NEXT: .LBB3_11:
%0 = type { %struct.rtx_def* }
%struct.lang_decl = type opaque
@@ -276,7 +273,7 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
; CHECK: foo:
; CHECK: callq func
; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq
; CHECK-NEXT: ret
define void @foo(i1* %V) nounwind {
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c7070f2..c3f4278 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -17,7 +17,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
; Adjust the stack to enter the function. (The amount of the
; adjustment may change in the future, in which case the location of
; the stack argument and the return adjustment will change too.)
-; CHECK: subq $8, %rsp
+; CHECK: pushq
; Put the call target into R11, which won't be clobbered while restoring
; callee-saved registers and won't be used for passing arguments.
; CHECK: movq %rdi, %rax
@@ -31,7 +31,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
; CHECK: movl $5, %r8d
; CHECK: movl $6, %r9d
; Adjust the stack to "return".
-; CHECK: addq $8, %rsp
+; CHECK: popq
; And tail-call to the target.
; CHECK: jmpq *%rax # TAILCALL
%res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
@@ -46,7 +46,7 @@ define fastcc i32 @direct_manyargs() {
; Adjust the stack to enter the function. (The amount of the
; adjustment may change in the future, in which case the location of
; the stack argument and the return adjustment will change too.)
-; CHECK: subq $8, %rsp
+; CHECK: pushq
; Pass the stack argument.
; CHECK: movl $7, 16(%rsp)
; Pass the register arguments, in the right registers.
@@ -62,7 +62,7 @@ define fastcc i32 @direct_manyargs() {
; arguments.
; CHECK: movabsq $manyargs_callee, %rax
; Adjust the stack to "return".
-; CHECK: addq $8, %rsp
+; CHECK: popq
; And tail-call to the target.
; CHECK: jmpq *%rax # TAILCALL
%res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 0000000..914d8f7
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-ri64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+ %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+ %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+ %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+ %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+ %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+ ret %vt* %call
+}
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index 0233139..d3f811c 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -19,5 +19,5 @@ define i32 @main(i32 %argc, i8** %argv) {
ret i32 0
}
-; CHECK: call tailcaller
+; CHECK: calll tailcaller
; CHECK-NEXT: subl $12
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 4ec127f..04c4e95 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
declare i32 @putchar(i32)
define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
%tmp1 = icmp sgt i32 %x, 0
br i1 %tmp1, label %if-then, label %if-else
@@ -10,6 +11,7 @@ if-then:
%fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)*
%arg1 = add i32 %x, -1
call i32 @putchar(i32 90)
+; CHECK: jmpl *%e{{.*}}
%res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
ret i32 %res
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 107bdf9..0c732d5 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,16 +1,20 @@
-; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq ${{24|88}}, %rsp
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl 32(%rsp), %eax
+; CHECK: movl [[A1:32|144]](%rsp), %eax
; Move param %in1 to temp register (%r10d).
-; CHECK: movl 40(%rsp), %r10d
+; CHECK: movl [[A2:40|152]](%rsp), %r10d
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl %edi, %eax
+; CHECK: addl {{%edi|%ecx}}, %eax
; Move param %in2 to stack.
-; CHECK: movl %r10d, 32(%rsp)
+; CHECK: movl %r10d, [[A1]](%rsp)
; Move result of addition to stack.
-; CHECK: movl %eax, 40(%rsp)
+; CHECK: movl %eax, [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL
@@ -22,4 +26,3 @@ entry:
%retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
ret i32 %retval
}
-
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index 4cad837..b83416d 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -11,11 +11,11 @@ entry:
; X32: f1:
; X32: leal i@TLSGD(,%ebx), %eax
-; X32: call ___tls_get_addr@PLT
+; X32: calll ___tls_get_addr@PLT
; X64: f1:
; X64: leaq i@TLSGD(%rip), %rdi
-; X64: call __tls_get_addr@PLT
+; X64: callq __tls_get_addr@PLT
@i2 = external thread_local global i32
@@ -27,11 +27,11 @@ entry:
; X32: f2:
; X32: leal i@TLSGD(,%ebx), %eax
-; X32: call ___tls_get_addr@PLT
+; X32: calll ___tls_get_addr@PLT
; X64: f2:
; X64: leaq i@TLSGD(%rip), %rdi
-; X64: call __tls_get_addr@PLT
+; X64: callq __tls_get_addr@PLT
@@ -43,11 +43,11 @@ entry:
; X32: f3:
; X32: leal i@TLSGD(,%ebx), %eax
-; X32: call ___tls_get_addr@PLT
+; X32: calll ___tls_get_addr@PLT
; X64: f3:
; X64: leaq i@TLSGD(%rip), %rdi
-; X64: call __tls_get_addr@PLT
+; X64: callq __tls_get_addr@PLT
define i32* @f4() nounwind {
@@ -57,11 +57,11 @@ entry:
; X32: f4:
; X32: leal i@TLSGD(,%ebx), %eax
-; X32: call ___tls_get_addr@PLT
+; X32: calll ___tls_get_addr@PLT
; X64: f4:
; X64: leaq i@TLSGD(%rip), %rdi
-; X64: call __tls_get_addr@PLT
+; X64: callq __tls_get_addr@PLT
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index 214146f..7d08df8 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -5,7 +5,7 @@
@i = external hidden thread_local global i32
-define i32 @f() {
+define i32 @f() nounwind {
entry:
%tmp1 = load i32* @i
ret i32 %tmp1
diff --git a/test/CodeGen/X86/tls-1.ll b/test/CodeGen/X86/tlv-1.ll
index de694d8..42940f1 100644
--- a/test/CodeGen/X86/tls-1.ll
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -1,5 +1,21 @@
; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+%struct.A = type { [48 x i8], i32, i32, i32 }
+
+@c = external thread_local global %struct.A, align 4
+
+define void @main() nounwind ssp {
+entry:
+ call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+ unreachable
+ ; CHECK: movq _c@TLVP(%rip), %rdi
+ ; CHECK-NEXT: callq *(%rdi)
+ ; CHECK-NEXT: movl $0, 56(%rax)
+ ; CHECK-NEXT: movq $0, 48(%rax)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
@a = thread_local global i32 0 ; <i32*> [#uses=0]
@b = thread_local global i32 0 ; <i32*> [#uses=0]
diff --git a/test/CodeGen/X86/tlv-2.ll b/test/CodeGen/X86/tlv-2.ll
new file mode 100644
index 0000000..5f29a60
--- /dev/null
+++ b/test/CodeGen/X86/tlv-2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -O0 | FileCheck %s
+
+@b = thread_local global i32 5, align 4
+@a = thread_local global i32 0, align 4
+@c = internal thread_local global i32 0, align 4
+@d = internal thread_local global i32 5, align 4
+
+define void @foo() nounwind ssp {
+entry:
+ store i32 1, i32* @a, align 4
+ ; CHECK: movq _a@TLVP(%rip), %rdi
+ ; CHECK: callq *(%rdi)
+ ; CHECK: movl $1, (%rax)
+
+ store i32 2, i32* @b, align 4
+ ; CHECK: movq _b@TLVP(%rip), %rdi
+ ; CHECK: callq *(%rdi)
+ ; CHECK: movl $2, (%rax)
+
+ store i32 3, i32* @c, align 4
+ ; CHECK: movq _c@TLVP(%rip), %rdi
+ ; CHECK: callq *(%rdi)
+ ; CHECK: movl $3, (%rax)
+
+ store i32 4, i32* @d, align 4
+ ; CHECK: movq _d@TLVP(%rip), %rdi
+ ; CHECK: callq *(%rdi)
+ ; CHECK: movl $4, (%rax)
+ ; CHECK: popq
+
+ ret void
+}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a245ed7..ec16dfe 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,20 +5,32 @@
;; allocator turns the shift into an LEA. This also occurs for ADD.
; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: not grep {mov E.X, E.X}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
-@G = external global i32 ; <i32*> [#uses=3]
+@G = external global i32
-define i32 @test1(i32 %X, i32 %Y) {
- %Z = add i32 %X, %Y ; <i32> [#uses=1]
- volatile store i32 %Y, i32* @G
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1:
+; CHECK-NOT: mov
+; CHECK: leal 1(%rdi)
+ %Z = add i32 %X, 1
volatile store i32 %Z, i32* @G
ret i32 %X
}
-define i32 @test2(i32 %X) {
- %Z = add i32 %X, 1 ; <i32> [#uses=1]
- volatile store i32 %Z, i32* @G
- ret i32 %X
+; rdar://8977508
+; The second add should not be transformed to leal nor should it be
+; commutted (which would require inserting a copy).
+define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: leal
+; CHECK-NOT: leal
+; CHECK-NOT: mov
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+ %add = add i32 %b, %a
+ %add3 = add i32 %add, %c
+ %add5 = add i32 %add3, %d
+ ret i32 %add5
}
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 0000000..d9f753c
--- /dev/null
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+ %b = uitofp i64 %a to float
+ ret float %b
+}
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index d522bd8..c997661 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | FileCheck %s
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
define i1 @a(i32 %x) zeroext nounwind {
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
%obil = extractvalue {i32, i1} %res, 1
ret i1 %obil
+
+; CHECK: a:
+; CHECK: mull
+; CHECK: seto %al
+; CHECK: movzbl %al, %eax
+; CHECK: ret
}
diff --git a/test/CodeGen/X86/umulo-64.ll b/test/CodeGen/X86/umulo-64.ll
new file mode 100644
index 0000000..280bd9c
--- /dev/null
+++ b/test/CodeGen/X86/umulo-64.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+%0 = type { i64, i1 }
+
+define i32 @f0(i64 %a, i64 %b) nounwind ssp {
+ %1 = alloca i64, align 4
+ %2 = alloca i64, align 4
+ store i64 %a, i64* %1, align 8
+ store i64 %b, i64* %2, align 8
+ %3 = load i64* %1, align 8
+ %4 = load i64* %2, align 8
+ %5 = call %0 @llvm.smul.with.overflow.i64(i64 %3, i64 %4)
+ %6 = extractvalue %0 %5, 0
+ %7 = extractvalue %0 %5, 1
+ br i1 %7, label %8, label %9
+
+; <label>:8 ; preds = %0
+ call void @llvm.trap()
+ unreachable
+
+; <label>:9 ; preds = %0
+ %10 = trunc i64 %6 to i32
+ ret i32 %10
+}
+
+declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index a99af06..6a493c0 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -13,7 +13,7 @@ entry:
bb:
%String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: call {{_?}}memcpy
+; I386: calll {{_?}}memcpy
; CORE2: movabsq
; CORE2: movabsq
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index fa98b78..09431b5 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -asm-verbose=false -march=x86-64 -use-unknown-locations | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-apple-darwin10 -use-unknown-locations | FileCheck %s
; The divide instruction does not have a debug location. CodeGen should
-; represent this in the debug information. This is checked by a check
-; for a label between the code for the add and the code for the divide,
-; which indicates that the add's location doesn't spill over unto the
-; divide.
+; represent this in the debug information. This is done by setting line
+; and column to 0
; CHECK: leal (%rdi,%rsi), %eax
+; CHECK-NEXT: .loc 1 0 0
; CHECK-NEXT: Ltmp
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %r8d
+; CHECK-NEXT: .loc 1 4 3
; CHECK-NEXT: Ltmp
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/vec-sign.ll b/test/CodeGen/X86/vec-sign.ll
new file mode 100644
index 0000000..31b9c2e
--- /dev/null
+++ b/test/CodeGen/X86/vec-sign.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+ %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %sub = sub nsw <4 x i32> zeroinitializer, %a
+ %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = and <4 x i32> %a, %0
+ %2 = and <4 x i32> %b.lobit, %sub
+ %cond = or <4 x i32> %1, %2
+ ret <4 x i32> %cond
+}
+
+define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+ %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %sub = sub nsw <4 x i32> zeroinitializer, %a
+ %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = and <4 x i32> %c, %0
+ %2 = and <4 x i32> %a, %b.lobit
+ %cond = or <4 x i32> %1, %2
+ ret <4 x i32> %cond
+}
diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
index 2f57d7b..4d665f1 100644
--- a/test/CodeGen/X86/vec-trunc-store.ll
+++ b/test/CodeGen/X86/vec-trunc-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -disable-mmx >/dev/null
+; RUN: llc < %s -march=x86-64
define void @foo(<8 x i32>* %p) nounwind {
%t = load <8 x i32>* %p
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index f853164..95289c9 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=core2
-; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx
define <8 x i32> @a(<8 x i16> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 091641b..04bb725 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
index 8d2a3c3..02b16a7 100644
--- a/test/CodeGen/X86/vec_ext_inreg.ll
+++ b/test/CodeGen/X86/vec_ext_inreg.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -march=x86-64
-; RUN: llc < %s -march=x86-64 -disable-mmx
define <8 x i32> @a(<8 x i32> %a) nounwind {
%b = trunc <8 x i32> %a to <8 x i16>
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 291fc04..471cc16 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,15 +1,16 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep psllq %t | grep 32
+; RUN: grep shll %t | grep 12
; RUN: grep pslldq %t | grep 12
; RUN: grep psrldq %t | grep 8
; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
-define void @t1(i32 %a, <1 x i64>* %P) nounwind {
+define void @t1(i32 %a, x86_mmx* %P) nounwind {
%tmp12 = shl i32 %a, 12
%tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
%tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
- %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
- store <1 x i64> %tmp23, <1 x i64>* %P
+ %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp23, x86_mmx* %P
ret void
}
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 9ede10f..268b5c4 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,8 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
-define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind {
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
entry:
- %tmp3 = insertelement <2 x i32> %x, i32 32, i32 0 ; <<2 x i32>> [#uses=1]
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+ %tmp = bitcast x86_mmx %x to <2 x i32>
+ %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 ; <<2 x i32>> [#uses=1]
%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %tmp8
+ %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+ ret x86_mmx %tmp9
}
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
deleted file mode 100644
index 033e9f7..0000000
--- a/test/CodeGen/X86/vec_select.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse
-
-define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
- %tmp = load <4 x float>* %A ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=2]
- %tmp9 = fmul <4 x float> %tmp3, %tmp3 ; <<4 x float>> [#uses=1]
- %tmp.upgrd.1 = icmp eq i32 %C, 0 ; <i1> [#uses=1]
- %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp ; <<4 x float>> [#uses=1]
- store <4 x float> %iftmp.38.0, <4 x float>* %A
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index 4f0acb2..6dd3cb0 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
define <2 x i64> @t1(<2 x i64>* %ptr) nounwind {
%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index d700ccb..dec98c7 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep addps %t | count 2
-; RUN: grep mulps %t | count 2
-; RUN: grep subps %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
; ModuleID = 'vec_shuffle-27.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -9,9 +6,33 @@ target triple = "i686-apple-cl.1.0"
define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
entry:
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1]
%sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1]
%mul = fmul <8 x float> %sub, %tmp7 ; <<8 x float>> [#uses=1]
%add = fadd <8 x float> %mul, %T0 ; <<8 x float>> [#uses=1]
ret <8 x float> %add
}
+
+; Test case for r122206
+define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
+entry:
+; CHECK: movdqa
+ %a = load <4 x i64> * %ap
+ %b = load <4 x i64> * %bp
+ %mulaa = mul <4 x i64> %a, %a
+ %mulbb = mul <4 x i64> %b, %b
+ %mulab = mul <4 x i64> %a, %b
+ %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+ %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+ %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+ %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ store <4 x i64> %vect1487, <4 x i64>* %ap
+ store <4 x i64> %vect1488, <4 x i64>* %bp
+ ret void;
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 3f69150..1651c4c 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
; RUN: grep pshufhw %t | grep -- -95 | count 1
; RUN: grep shufps %t | count 1
; RUN: not grep pslldq %t
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 1ed858d..b090930 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
entry:
@@ -12,3 +13,12 @@ entry:
ret <4 x i32> %2
}
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd %xmm0, %xmm0
+ %tmp93 = load double* %a0, align 8
+ %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+ store <2 x double> %vecinit94, <2 x double>* undef
+ ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 3b15d4c..8aa5094 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
@M1 = external global <1 x i64>
@M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/visibility.ll b/test/CodeGen/X86/visibility.ll
new file mode 100644
index 0000000..a8d2870
--- /dev/null
+++ b/test/CodeGen/X86/visibility.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define hidden void @foo() nounwind {
+entry:
+ call void @bar()
+ ret void
+}
+
+declare hidden void @bar()
+
+;CHECK: .hidden bar
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index ae845e0..4955156 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 36feb11..9a9b419 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 20d3f48..8e8a9aa 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 9773cbe..8e24fda 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same when using a shuffle splat.
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index a543f38..cb254ae 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; When loading the shift amount from memory, avoid generating the splat.
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index a247c6e..97dacfd 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 | FileCheck %s
define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index f8d0690..4b8016d 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; Widen a v3i8 to v16i8 to use a vector add
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index fdecaa3..03b3fea 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: paddb
; CHECK: pand
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 1f2c250..0574923 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
; CHECK: paddw
; CHECK: pextrw
; CHECK: movd
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index f7506ae..5931d63 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
; CHECK: psubw
; CHECK-NEXT: pmullw
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index bae5c54..7f2eff0 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
; CHECK: movdqa
; CHECK: pmulld
; CHECK: psubd
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 538123f..b983d14 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: mulps
; CHECK: addps
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d4ab174..1eace9e 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s -disable-mmx | FileCheck %s
+; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
; CHECK: paddw
; CHECK: pextrd
; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 14e8f75..5c695ea 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: pextrd
; CHECK: pextrd
; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 02674dd..87486d9 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: paddd
; CHECK: pextrd
; CHECK: pextrd
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 5f31e56..8e1adf5 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: sarb
; CHECK: sarb
; CHECK: sarb
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index d1d7fec..136578d 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: movl
; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 08759bf..3903234 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
; CHECK: movd
; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a2029dd..f6810cd 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: pshufd
; CHECK: paddd
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index b24a9b3..969cb51 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: movswl
; CHECK: movswl
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 1a40800..a25fae9 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: cvtsi2ss
; sign to float v2i16 to v2f32
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index e505b62..80f3a49 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: cvtsi2ss
; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 308e6b8..4bcac58 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
; widen extract subvector
define void @convert(<2 x double>* %dst.addr, <3 x double> %src) {
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index d397645..639617f 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
; PR4891
; This load should be before the call, not after.
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 551704c..6422063 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
; Test based on pr5626 to load/store
;
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
deleted file mode 100644
index d9de892..0000000
--- a/test/CodeGen/X86/widen_select-1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: jne
-
-; widening select v6i32 and then a sub
-
-define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
-entry:
- %x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
- %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- store <6 x i32> %val, <6 x i32>* %dst.addr
- ret void
-}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 463f522..034c42c 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; widening shuffle v3float and then a add
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 0000000..f9d4bf9
--- /dev/null
+++ b/test/CodeGen/X86/win64_params.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl 48(%rsp), %eax
+; CHECK: addl 40(%rsp), %eax
+ %add = add nsw i32 %p6, %p5
+ ret i32 %add
+}
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 0000000..a451318
--- /dev/null
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq %r9, 40(%rsp)
+; CHECK: movq %r8, 32(%rsp)
+; CHECK: movq %rdx, 24(%rsp)
+; CHECK: leaq 24(%rsp), %rax
+
+ %ap = alloca i8*, align 8 ; <i8**> [#uses=1]
+ %ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
+ call void @llvm.va_start(i8* %ap1)
+ ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index 27d3358..82ce81d 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
@@ -12,10 +13,10 @@
; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
define i32 @main4k() nounwind {
entry:
-; WIN_X32: call __chkstk
-; WIN_X64: call __chkstk
-; MINGW_X32: call __alloca
-; MINGW_X64: call _alloca
+; WIN_X32: calll __chkstk
+; WIN_X64: callq __chkstk
+; MINGW_X32: calll __alloca
+; MINGW_X64: callq __chkstk
; LINUX-NOT: call __chkstk
%array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0]
ret i32 0
@@ -26,15 +27,15 @@ entry:
define i32 @main128() nounwind {
entry:
; WIN_X32: # BB#0:
-; WIN_X32-NOT: call __chkstk
+; WIN_X32-NOT: calll __chkstk
; WIN_X32: ret
; WIN_X64: # BB#0:
-; WIN_X64-NOT: call __chkstk
+; WIN_X64-NOT: callq __chkstk
; WIN_X64: ret
; MINGW_X64: # BB#0:
-; MINGW_X64-NOT: call _alloca
+; MINGW_X64-NOT: callq _alloca
; MINGW_X64: ret
; LINUX: # BB#0:
diff --git a/test/CodeGen/X86/x86-64-extend-shift.ll b/test/CodeGen/X86/x86-64-extend-shift.ll
new file mode 100644
index 0000000..6852785
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.
+
+define i64 @baz(i32 %A) nounwind {
+; CHECK: shlq $49, %rax
+ %tmp1 = shl i32 %A, 17
+ %tmp2 = zext i32 %tmp1 to i64
+ %tmp3 = shl i64 %tmp2, 32
+ ret i64 %tmp3
+}
diff --git a/test/CodeGen/X86/x86_64-mul-by-const.ll b/test/CodeGen/X86/x86_64-mul-by-const.ll
new file mode 100644
index 0000000..df48a29
--- /dev/null
+++ b/test/CodeGen/X86/x86_64-mul-by-const.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts. rdar://8771012.
+
+define i32 @f9188_mul365384439_shift27(i32 %A) nounwind {
+; CHECK: imulq $365384439,
+; CHECK: shrq $59, %rax
+ %tmp1 = udiv i32 %A, 1577682821 ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
new file mode 100644
index 0000000..e61e880
--- /dev/null
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define void @t() nounwind ssp {
+; CHECK: t:
+entry:
+ br i1 undef, label %return, label %if.end.i
+
+if.end.i: ; preds = %entry
+ %tmp7.i = load i32* undef, align 4, !tbaa !0
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %if.end.i
+; CHECK: %if.end
+; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: xorb
+ %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
+ %tmp867 = zext i32 %tmp138 to i64
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %if.end
+ %tmp869 = sub i64 %tmp867, 0
+ %scale2.0 = trunc i64 %tmp869 to i32
+ %cmp149 = icmp eq i32 %scale2.0, 0
+ br i1 %cmp149, label %while.end, label %land.rhs
+
+land.rhs: ; preds = %while.cond
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %land.rhs
+ br label %while.cond
+
+while.end: ; preds = %land.rhs, %while.cond
+ br i1 undef, label %cond.false205, label %cond.true190
+
+cond.true190: ; preds = %while.end
+ br i1 undef, label %cond.false242, label %cond.true225
+
+cond.false205: ; preds = %while.end
+ unreachable
+
+cond.true225: ; preds = %cond.true190
+ br i1 undef, label %cond.false280, label %cond.true271
+
+cond.false242: ; preds = %cond.true190
+ unreachable
+
+cond.true271: ; preds = %cond.true225
+ unreachable
+
+cond.false280: ; preds = %cond.true225
+ unreachable
+
+return: ; preds = %if.end.i, %entry
+ ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
OpenPOWER on IntegriCloud