diff options
Diffstat (limited to 'test/CodeGen/X86')
24 files changed, 941 insertions, 33 deletions
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll index c106f57..3f67097 100644 --- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll +++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll @@ -11,7 +11,7 @@ target triple = "i686-apple-darwin8.6.1" declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) -declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) @@ -33,8 +33,8 @@ cond_false183: ; preds = %cond_false, %entry %tmp337 = bitcast <4 x i32> %tmp336 to <4 x float> ; <<4 x float>> [#uses=1] %tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 ) ; <<4 x float>> [#uses=1] %tmp379 = bitcast <4 x float> %tmp378 to <4 x i32> ; <<4 x i32>> [#uses=1] - %tmp388 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 ) ; <<4 x i32>> [#uses=1] - %tmp392 = bitcast <4 x i32> %tmp388 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 ) ; <<4 x i32>> [#uses=1] + %tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16> ; <<8 x i16>> [#uses=1] %tmp399 = extractelement <8 x i16> %tmp392, i32 7 ; <i16> [#uses=1] %tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7 ; <<8 x i16>> [#uses=1] %tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8> ; <<16 x i8>> [#uses=1] diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll index 49f3a95..b045329 100644 --- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll +++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll @@ -17,8 +17,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %tmp75 = bitcast <4 x float> %tmp74 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 ) ; <<4 x float>> [#uses=1] %tmp89 = bitcast <4 x float> %tmp88 to <4 x i32> ; <<4 x i32>> [#uses=1] - %tmp98 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 ) ; <<4 x i32>> [#uses=1] - %tmp102 = bitcast <4 x i32> %tmp98 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 ) ; <<4 x i32>> [#uses=1] + %tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16> ; <<8 x i16>> [#uses=1] %tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] %tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] %tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float> ; <<4 x float>> [#uses=1] @@ -32,8 +32,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %tmp134 = bitcast <4 x float> %tmp133 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 ) ; <<4 x float>> [#uses=1] %tmp148 = bitcast <4 x float> %tmp147 to <4 x i32> ; <<4 x i32>> [#uses=1] - %tmp159 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 ) ; <<4 x i32>> [#uses=1] - %tmp163 = bitcast <4 x i32> %tmp159 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 ) ; <<4 x i32>> [#uses=1] + %tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16> ; <<8 x i16>> [#uses=1] %tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] %tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] %tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float> ; <<4 x float>> [#uses=1] @@ -47,8 +47,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %tmp195 = bitcast <4 x float> %tmp194 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 ) ; <<4 x float>> [#uses=1] %tmp209 = bitcast <4 x float> %tmp208 to <4 x i32> ; <<4 x i32>> [#uses=1] - %tmp220 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 ) ; <<4 x i32>> [#uses=1] - %tmp224 = bitcast <4 x i32> %tmp220 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 ) ; <<4 x i32>> [#uses=1] + %tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16> ; <<8 x i16>> [#uses=1] %tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] %tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] %tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float> ; <<4 x float>> [#uses=1] @@ -62,8 +62,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %tmp256 = bitcast <4 x float> %tmp255 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 ) ; <<4 x float>> [#uses=1] %tmp270 = bitcast <4 x float> %tmp269 to <4 x i32> ; <<4 x i32>> [#uses=1] - %tmp281 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 ) ; <<4 x i32>> [#uses=1] - %tmp285 = bitcast <4 x i32> %tmp281 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 ) ; <<4 x i32>> [#uses=1] + %tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16> ; <<8 x i16>> [#uses=1] %tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] %tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] %tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float> ; <<4 x float>> [#uses=1] @@ -73,4 +73,4 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) -declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll index 989dfc5..b27ef83 100644 --- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll +++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 ; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd -declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) @@ -13,8 +13,8 @@ define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32 %tmp805 = add <4 x i32> %tmp777, zeroinitializer %tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16> %tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > ) - %tmp1020 = tail call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 ) - %tmp1030 = bitcast <8 x i16> %tmp1020 to <4 x i32> + %tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 ) + %tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32> %tmp1033 = add <4 x i32> zeroinitializer, %tmp1030 %tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64> %tmp1049 = or <2 x i64> %tmp1048, zeroinitializer diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll index 83eb61a..2aea9c5 100644 --- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll +++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local +; PR5534 %struct.CGPoint = type { double, double } %struct.NSArray = type { %struct.NSObject } diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll new file mode 100644 index 0000000..8c62f4d --- /dev/null +++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Spill" +; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Folded Spill" +; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Reload" + + %struct..0anon = type { i32 } + %struct.rtvec_def = type { i32, [1 x %struct..0anon] } + %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } +@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1] +@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1] + +declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32) + +define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) { +entry: + %tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1] + br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next + +cond_next: ; preds = %entry + %tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1] + %tmp7 = load i16* %tmp6 ; <i16> [#uses=2] + %tmp78 = zext i16 %tmp7 to i32 ; <i32> [#uses=2] + %tmp10 = icmp eq i16 %tmp7, 54 ; <i1> [#uses=1] + br i1 %tmp10, label %cond_true13, label %cond_next32 + +cond_true13: ; preds = %cond_next + %tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1] + %tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] + %tmp19 = load %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1] + %tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1] + %tmp21 = load i16* %tmp20 ; <i16> [#uses=1] + %tmp22 = icmp eq i16 %tmp21, 57 ; <i1> [#uses=1] + br i1 %tmp22, label %cond_true25, label %cond_next32 + +cond_true25: ; preds = %cond_true13 + %tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1] + ret %struct.rtx_def* %tmp29 + +cond_next32: ; preds = %cond_true13, %cond_next + %tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1] + %tmp35 = load i8** %tmp34, align 4 ; <i8*> [#uses=1] + %tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1] + %tmp38 = load i32* %tmp37, align 4 ; <i32> [#uses=1] + %i.011 = add i32 %tmp38, -1 ; <i32> [#uses=2] + %tmp12513 = icmp sgt i32 %i.011, -1 ; <i1> [#uses=1] + br i1 %tmp12513, label %bb, label %UnifiedReturnBlock + +bb: ; preds = %bb123, %cond_next32 + %indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ] ; <i32> [#uses=2] + %i.01.0 = sub i32 %i.011, %indvar ; <i32> [#uses=5] + %tmp42 = getelementptr i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2] + %tmp43 = load i8* %tmp42 ; <i8> [#uses=1] + switch i8 %tmp43, label %bb123 [ + i8 101, label %cond_true47 + i8 69, label %bb105.preheader + ] + +cond_true47: ; preds = %bb + %tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1] + %tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] + %tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1] + %tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] + %tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1] + %tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1] + store i32 %tmp58.c, i32* %tmp62 + %tmp6816 = load i8* %tmp42 ; <i8> [#uses=1] + %tmp6917 = icmp eq i8 %tmp6816, 69 ; <i1> [#uses=1] + br i1 %tmp6917, label %bb105.preheader, label %bb123 + +bb105.preheader: ; preds = %cond_true47, %bb + %tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1] + %tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def** ; <%struct.rtvec_def**> [#uses=3] + %tmp11322 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1] + %tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp11524 = load i32* %tmp11423 ; <i32> [#uses=1] + %tmp11625 = icmp eq i32 %tmp11524, 0 ; <i1> [#uses=1] + br i1 %tmp11625, label %bb123, label %bb73 + +bb73: ; preds = %bb73, %bb105.preheader + %j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ] ; <i32> [#uses=3] + %tmp81 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2] + %tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1] + %tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] + %tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1] + %tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] + %tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1] + %tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1] + store i32 %tmp98.c, i32* %tmp101 + %tmp104 = add i32 %j.019, 1 ; <i32> [#uses=2] + %tmp113 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1] + %tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp115 = load i32* %tmp114 ; <i32> [#uses=1] + %tmp116 = icmp ult i32 %tmp104, %tmp115 ; <i1> [#uses=1] + br i1 %tmp116, label %bb73, label %bb123 + +bb123: ; preds = %bb73, %bb105.preheader, %cond_true47, %bb + %i.0 = add i32 %i.01.0, -1 ; <i32> [#uses=1] + %tmp125 = icmp sgt i32 %i.0, -1 ; <i1> [#uses=1] + %indvar.next26 = add i32 %indvar, 1 ; <i32> [#uses=1] + br i1 %tmp125, label %bb, label %UnifiedReturnBlock + +UnifiedReturnBlock: ; preds = %bb123, %cond_next32, %entry + %UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ] ; <%struct.rtx_def*> [#uses=1] + ret %struct.rtx_def* %UnifiedRetVal +} diff --git a/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll new file mode 100644 index 0000000..5398eef --- /dev/null +++ b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll @@ -0,0 +1,133 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim +; rdar://7394770 + +%struct.JVTLib_100487 = type <{ i8 }> + +define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp { +bb.nph: + %0 = shl i32 undef, 1 ; <i32> [#uses=2] + %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1] + br label %bb + +bb: ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + br i1 undef, label %bb.i, label %bb1.i + +bb2: ; preds = %bb + unreachable + +bb.i: ; preds = %bb1 + br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit + +bb1.i: ; preds = %bb1 + br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit + +_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i + br i1 undef, label %bb5, label %bb + +bb5: ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit + %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2] + %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1] + %mask241.masked = or i256 undef, undef ; <i256> [#uses=1] + %ins237 = or i256 undef, 0 ; <i256> [#uses=1] + br i1 undef, label %bb9, label %bb10 + +bb9: ; preds = %bb5 + br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit + +bb12.i: ; preds = %bb9 + br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit + +_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit: ; preds = %bb12.i, %bb9 + ret i32 undef + +bb10: ; preds = %bb5 + %1 = sext i16 undef to i32 ; <i32> [#uses=1] + %2 = sext i16 undef to i32 ; <i32> [#uses=1] + %3 = sext i16 undef to i32 ; <i32> [#uses=1] + %4 = sext i16 undef to i32 ; <i32> [#uses=1] + %5 = sext i16 undef to i32 ; <i32> [#uses=1] + %6 = sext i16 undef to i32 ; <i32> [#uses=1] + %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0] + %7 = sext i16 undef to i32 ; <i32> [#uses=1] + %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1] + %tmp209 = trunc i256 %tmp208 to i16 ; <i16> [#uses=1] + %8 = sext i16 %tmp209 to i32 ; <i32> [#uses=1] + %9 = sext i16 undef to i32 ; <i32> [#uses=1] + %10 = sext i16 undef to i32 ; <i32> [#uses=1] + %tmp193 = lshr i256 %mask241.masked, 208 ; <i256> [#uses=1] + %tmp194 = trunc i256 %tmp193 to i16 ; <i16> [#uses=1] + %11 = sext i16 %tmp194 to i32 ; <i32> [#uses=1] + %tmp187 = lshr i256 %ins237, 240 ; <i256> [#uses=1] + %tmp188 = trunc i256 %tmp187 to i16 ; <i16> [#uses=1] + %12 = sext i16 %tmp188 to i32 ; <i32> [#uses=1] + %13 = add nsw i32 %4, %1 ; <i32> [#uses=1] + %14 = add nsw i32 %5, 0 ; <i32> [#uses=1] + %15 = add nsw i32 %6, %2 ; <i32> [#uses=1] + %16 = add nsw i32 %7, %3 ; <i32> [#uses=1] + %17 = add nsw i32 0, %8 ; <i32> [#uses=1] + %18 = add nsw i32 %11, %9 ; <i32> [#uses=1] + %19 = add nsw i32 0, %10 ; <i32> [#uses=1] + %20 = add nsw i32 %12, 0 ; <i32> [#uses=1] + %21 = add nsw i32 %17, %13 ; <i32> [#uses=2] + %22 = add nsw i32 %18, %14 ; <i32> [#uses=2] + %23 = add nsw i32 %19, %15 ; <i32> [#uses=2] + %24 = add nsw i32 %20, %16 ; <i32> [#uses=2] + %25 = add nsw i32 %22, %21 ; <i32> [#uses=2] + %26 = add nsw i32 %24, %23 ; <i32> [#uses=2] + %27 = sub i32 %21, %22 ; <i32> [#uses=1] + %28 = sub i32 %23, %24 ; <i32> [#uses=1] + %29 = add nsw i32 %26, %25 ; <i32> [#uses=1] + %30 = sub i32 %25, %26 ; <i32> [#uses=1] + %31 = sub i32 %27, %28 ; <i32> [#uses=1] + %32 = ashr i32 %29, 1 ; <i32> [#uses=2] + %33 = ashr i32 %30, 1 ; <i32> [#uses=2] + %34 = ashr i32 %31, 1 ; <i32> [#uses=2] + %35 = icmp sgt i32 %32, 32767 ; <i1> [#uses=1] + %o0_0.0.i = select i1 %35, i32 32767, i32 %32 ; <i32> [#uses=2] + %36 = icmp slt i32 %o0_0.0.i, -32768 ; <i1> [#uses=1] + %37 = icmp sgt i32 %33, 32767 ; <i1> [#uses=1] + %o1_0.0.i = select i1 %37, i32 32767, i32 %33 ; <i32> [#uses=2] + %38 = icmp slt i32 %o1_0.0.i, -32768 ; <i1> [#uses=1] + %39 = icmp sgt i32 %34, 32767 ; <i1> [#uses=1] + %o2_0.0.i = select i1 %39, i32 32767, i32 %34 ; <i32> [#uses=2] + %40 = icmp slt i32 %o2_0.0.i, -32768 ; <i1> [#uses=1] + %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1] + %41 = trunc i32 %o0_0.0.i to i16 ; <i16> [#uses=1] + %tmp358 = select i1 %36, i16 -32768, i16 %41 ; <i16> [#uses=2] + %42 = trunc i32 %o1_0.0.i to i16 ; <i16> [#uses=1] + %tmp347 = select i1 %38, i16 -32768, i16 %42 ; <i16> [#uses=1] + %43 = trunc i32 %o2_0.0.i to i16 ; <i16> [#uses=1] + %tmp335 = select i1 %40, i16 -32768, i16 %43 ; <i16> [#uses=1] + %44 = icmp sgt i16 %tmp358, -1 ; <i1> [#uses=2] + %..i24 = select i1 %44, i16 %tmp358, i16 undef ; <i16> [#uses=1] + %45 = icmp sgt i16 %tmp347, -1 ; <i1> [#uses=1] + %46 = icmp sgt i16 %tmp335, -1 ; <i1> [#uses=1] + %47 = zext i16 %..i24 to i32 ; <i32> [#uses=1] + %tmp = trunc i640 %tmp101 to i32 ; <i32> [#uses=1] + %48 = and i32 %tmp, 65535 ; <i32> [#uses=2] + %49 = mul i32 %47, %48 ; <i32> [#uses=1] + %50 = zext i16 undef to i32 ; <i32> [#uses=1] + %51 = mul i32 %50, %48 ; <i32> [#uses=1] + %52 = add i32 %49, %0 ; <i32> [#uses=1] + %53 = add i32 %51, %0 ; <i32> [#uses=1] + %54 = lshr i32 %52, undef ; <i32> [#uses=1] + %55 = lshr i32 %53, undef ; <i32> [#uses=1] + %56 = trunc i32 %54 to i16 ; <i16> [#uses=1] + %57 = trunc i32 %55 to i16 ; <i16> [#uses=1] + %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1] + %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1] + %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1] + br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit + +bb129.i: ; preds = %bb10 + br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit + +_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit: ; preds = %bb129.i, %bb10 + %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0] + %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0] + store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2 + unreachable +} diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll new file mode 100644 index 0000000..a7c2020 --- /dev/null +++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7395200 + +@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4] + +define void @foo(i32 %n, float* nocapture %x) nounwind ssp { +entry: +; CHECK: foo: + %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] + br i1 %0, label %bb.nph, label %return + +bb.nph: ; preds = %entry +; CHECK: movq _g@GOTPCREL(%rip), %rcx + %tmp = zext i32 %n to i64 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph +; CHECK: LBB1_2: + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2] + %tmp9 = shl i64 %indvar, 2 ; <i64> [#uses=4] + %tmp1016 = or i64 %tmp9, 1 ; <i64> [#uses=1] + %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1] + %tmp1117 = or i64 %tmp9, 2 ; <i64> [#uses=1] + %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1] + %tmp1318 = or i64 %tmp9, 3 ; <i64> [#uses=1] + %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1] + %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1] + %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1] + store float %1, float* %x_addr.03, align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1] + store float %2, float* %scevgep, align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1] + store float %3, float* %scevgep12, align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1] + store float %4, float* %scevgep14, align 4 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll new file mode 100644 index 0000000..3ce9edb --- /dev/null +++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7396984 + +@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1 + +define void @t(i32 %count) ssp nounwind { +entry: +; CHECK: t: +; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) +; CHECK: movups L_str(%rip), %xmm0 + %tmp0 = alloca [60 x i8], align 1 + %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0 + br label %bb1 + +bb1: +; CHECK: LBB1_1: +; CHECK: movaps %xmm0, (%rsp) + %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ] + call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1) + %tmp3 = add i32 %tmp2, 1 + %tmp4 = icmp eq i32 %tmp3, %count + br i1 %tmp4, label %bb2, label %bb1 + +bb2: + ret void +} + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll new file mode 100644 index 0000000..5c1a2bc --- /dev/null +++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll @@ -0,0 +1,52 @@ +; RUN: llc -O3 < %s +; This test fails with: +; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.2" + +%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" } +%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" } +%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* } +%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" } +%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* } +%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* } +%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" } +%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" } +%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" } + +define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 { +entry: + br i1 undef, label %bb56, label %bb27.outer + +bb8: ; preds = %bb27.outer108, %bb13 + switch i8 undef, label %bb27.outer [ + i8 35, label %bb56 + i8 37, label %bb14 + i8 38, label %bb10 + i8 42, label %bb56 + ] + +bb27.outer: ; preds = %bb8, %entry + %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2] + br label %bb27.outer108 + +bb10: ; preds = %bb8 + %toBool = icmp eq i8 0, 0 ; <i1> [#uses=1] + %or.cond = and i1 undef, %toBool ; <i1> [#uses=1] + br i1 %or.cond, label %bb13, label %bb56 + +bb13: ; preds = %bb10 + br i1 undef, label %bb27.outer108, label %bb8 + +bb14: ; preds = %bb8 + ret i8 1 + +bb27.outer108: ; preds = %bb13, %bb27.outer + %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1] + %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0] + br label %bb8 + +bb56: ; preds = %bb10, %bb8, %bb8, %entry + ret i8 1 +} diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll new file mode 100644 index 0000000..633995d --- /dev/null +++ b/test/CodeGen/X86/bigstructret.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86 -o %t +; RUN: grep "movl .24601, 12(%ecx)" %t +; RUN: grep "movl .48, 8(%ecx)" %t +; RUN: grep "movl .24, 4(%ecx)" %t +; RUN: grep "movl .12, (%ecx)" %t + +%0 = type { i32, i32, i32, i32 } + +define internal fastcc %0 @ReturnBigStruct() nounwind readnone { +entry: + %0 = insertvalue %0 zeroinitializer, i32 12, 0 + %1 = insertvalue %0 %0, i32 24, 1 + %2 = insertvalue %0 %1, i32 48, 2 + %3 = insertvalue %0 %2, i32 24601, 3 + ret %0 %3 +} + diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll index de89374..4878448 100644 --- a/test/CodeGen/X86/cmp0.ll +++ b/test/CodeGen/X86/cmp0.ll @@ -1,7 +1,24 @@ -; RUN: llc < %s -march=x86-64 | grep -v cmp +; RUN: llc < %s -march=x86-64 | FileCheck %s -define i64 @foo(i64 %x) { +define i64 @test0(i64 %x) nounwind { %t = icmp eq i64 %x, 0 %r = zext i1 %t to i64 ret i64 %r +; CHECK: test0: +; CHECK: testq %rdi, %rdi +; CHECK: sete %al +; CHECK: movzbl %al, %eax +; CHECK: ret } + +define i64 @test1(i64 %x) nounwind { + %t = icmp slt i64 %x, 1 + %r = zext i1 %t to i64 + ret i64 %r +; CHECK: test1: +; CHECK: testq %rdi, %rdi +; CHECK: setle %al +; CHECK: movzbl %al, %eax +; CHECK: ret +} + diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-5.ll new file mode 100644 index 0000000..88fae37 --- /dev/null +++ b/test/CodeGen/X86/hidden-vis-5.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s +; <rdar://problem/7383328> + +@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1] + +define hidden void @func() nounwind ssp { +entry: + %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0] + br label %return + +return: ; preds = %entry + ret void +} + +declare i32 @puts(i8*) + +define hidden i32 @main() nounwind ssp { +entry: + %retval = alloca i32 ; <i32*> [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @func() nounwind + br label %return + +return: ; preds = %entry + %retval1 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval1 +} + +; CHECK: .private_extern _func.eh +; CHECK: .private_extern _main.eh diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll index c0379d1..ec5236b 100644 --- a/test/CodeGen/X86/loop-blocks.ll +++ b/test/CodeGen/X86/loop-blocks.ll @@ -74,16 +74,16 @@ exit: ; CHECK: yet_more_involved: ; CHECK: jmp .LBB3_1 ; CHECK-NEXT: align -; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: call bar99 ; CHECK-NEXT: call get ; CHECK-NEXT: cmpl $2999, %eax -; CHECK-NEXT: jg .LBB3_5 +; CHECK-NEXT: jg .LBB3_6 ; CHECK-NEXT: call block_a_true_func -; CHECK-NEXT: jmp .LBB3_6 -; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: call block_a_false_func +; CHECK-NEXT: jmp .LBB3_7 ; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: call block_a_false_func +; CHECK-NEXT: .LBB3_7: ; CHECK-NEXT: call block_a_merge_func ; CHECK-NEXT: .LBB3_1: ; CHECK-NEXT: call body diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll index a1f38a7..9b53adb 100644 --- a/test/CodeGen/X86/loop-strength-reduce2.ll +++ b/test/CodeGen/X86/loop-strength-reduce2.ll @@ -4,7 +4,7 @@ @flags2 = internal global [8193 x i8] zeroinitializer, align 32 ; <[8193 x i8]*> [#uses=1] -define void @test(i32 %k, i32 %i) { +define void @test(i32 %k, i32 %i) nounwind { entry: %k_addr.012 = shl i32 %i, 1 ; <i32> [#uses=1] %tmp14 = icmp sgt i32 %k_addr.012, 8192 ; <i1> [#uses=1] diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll index e340edd..c45a374 100644 --- a/test/CodeGen/X86/loop-strength-reduce3.ll +++ b/test/CodeGen/X86/loop-strength-reduce3.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 | grep cmp | grep 240 ; RUN: llc < %s -march=x86 | grep inc | count 1 -define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) { +define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind { entry: %tmp2955 = icmp sgt i32 %C, 0 ; <i1> [#uses=1] br i1 %tmp2955, label %bb26.outer.us, label %bb40.split diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll index 4ec2a02..b07eeb6 100644 --- a/test/CodeGen/X86/loop-strength-reduce5.ll +++ b/test/CodeGen/X86/loop-strength-reduce5.ll @@ -3,7 +3,7 @@ @X = weak global i16 0 ; <i16*> [#uses=1] @Y = weak global i16 0 ; <i16*> [#uses=1] -define void @foo(i32 %N) { +define void @foo(i32 %N) nounwind { entry: %tmp1019 = icmp sgt i32 %N, 0 ; <i1> [#uses=1] br i1 %tmp1019, label %bb, label %return diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll index 81da82e..bbafcf7c 100644 --- a/test/CodeGen/X86/loop-strength-reduce6.ll +++ b/test/CodeGen/X86/loop-strength-reduce6.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 | not grep inc -define fastcc i32 @decodeMP3(i32 %isize, i32* %done) { +define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind { entry: br i1 false, label %cond_next191, label %cond_true189 diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll new file mode 100644 index 0000000..3f90245 --- /dev/null +++ b/test/CodeGen/X86/object-size.ll @@ -0,0 +1,55 @@ +; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64 + +; ModuleID = 'ts.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0" + +@p = common global i8* null, align 8 ; <i8**> [#uses=4] +@.str = private constant [3 x i8] c"Hi\00" ; <[3 x i8]*> [#uses=1] + +define void @bar() nounwind ssp { +entry: + %tmp = load i8** @p ; <i8*> [#uses=1] + %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i32 0) ; <i64> [#uses=1] + %cmp = icmp ne i64 %0, -1 ; <i1> [#uses=1] +; X64: movq $-1, %rax +; X64: cmpq $-1, %rax + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + %tmp1 = load i8** @p ; <i8*> [#uses=1] + %tmp2 = load i8** @p ; <i8*> [#uses=1] + %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1] + %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1] + br label %cond.end + +cond.false: ; preds = %entry + %tmp3 = load i8** @p ; <i8*> [#uses=1] + %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1] + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0] + ret void +} + +declare i64 @llvm.objectsize.i64(i8*, i32) nounwind readonly + +declare i8* @__strcpy_chk(i8*, i8*, i64) ssp + +define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp { +entry: + %retval = alloca i8* ; <i8**> [#uses=2] + %__dest.addr = alloca i8* ; <i8**> [#uses=3] + %__src.addr = alloca i8* ; <i8**> [#uses=2] + store i8* %__dest, i8** %__dest.addr + store i8* %__src, i8** %__src.addr + %tmp = load i8** %__dest.addr ; <i8*> [#uses=1] + %tmp1 = load i8** %__src.addr ; <i8*> [#uses=1] + %tmp2 = load i8** %__dest.addr ; <i8*> [#uses=1] + %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1] + %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1] + store i8* %call, i8** %retval + %1 = load i8** %retval ; <i8*> [#uses=1] + ret i8* %1 +} diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index 6319cb8..21c1a3c 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -145,7 +145,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind { ret void ; X64: t9: ; X64: movsd (%rsi), %xmm0 -; X64: movhps %xmm0, (%rdi) +; X64: movaps (%rdi), %xmm1 +; X64: movlhps %xmm0, %xmm1 +; X64: movaps %xmm1, (%rdi) ; X64: ret } diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index 0f32a50..d762392 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t ; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6 -; RUN: grep asm-printer %t | grep 177 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll new file mode 100644 index 0000000..0d86e56 --- /dev/null +++ b/test/CodeGen/X86/tail-opts.ll @@ -0,0 +1,408 @@ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s + +declare void @bar(i32) +declare void @car(i32) +declare void @dar(i32) +declare void @ear(i32) +declare void @far(i32) +declare i1 @qux() + +@GHJK = global i32 0 +@HABC = global i32 0 + +; BranchFolding should tail-merge the stores since they all precede +; direct branches to the same place. + +; CHECK: tail_merge_me: +; CHECK-NOT: GHJK +; CHECK: movl $0, GHJK(%rip) +; CHECK-NEXT: movl $1, HABC(%rip) +; CHECK-NOT: GHJK + +define void @tail_merge_me() nounwind { +entry: + %a = call i1 @qux() + br i1 %a, label %A, label %next +next: + %b = call i1 @qux() + br i1 %b, label %B, label %C + +A: + call void @bar(i32 0) + store i32 0, i32* @GHJK + br label %M + +B: + call void @car(i32 1) + store i32 0, i32* @GHJK + br label %M + +C: + call void @dar(i32 2) + store i32 0, i32* @GHJK + br label %M + +M: + store i32 1, i32* @HABC + %c = call i1 @qux() + br i1 %c, label %return, label %altret + +return: + call void @ear(i32 1000) + ret void +altret: + call void @far(i32 1001) + ret void +} + +declare i8* @choose(i8*, i8*); + +; BranchFolding should tail-duplicate the indirect jump to avoid +; redundant branching. + +; CHECK: tail_duplicate_me: +; CHECK: movl $0, GHJK(%rip) +; CHECK-NEXT: jmpq *%rbx +; CHECK: movl $0, GHJK(%rip) +; CHECK-NEXT: jmpq *%rbx +; CHECK: movl $0, GHJK(%rip) +; CHECK-NEXT: jmpq *%rbx + +define void @tail_duplicate_me() nounwind { +entry: + %a = call i1 @qux() + %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return), + i8* blockaddress(@tail_duplicate_me, %altret)) + br i1 %a, label %A, label %next +next: + %b = call i1 @qux() + br i1 %b, label %B, label %C + +A: + call void @bar(i32 0) + store i32 0, i32* @GHJK + br label %M + +B: + call void @car(i32 1) + store i32 0, i32* @GHJK + br label %M + +C: + call void @dar(i32 2) + store i32 0, i32* @GHJK + br label %M + +M: + indirectbr i8* %c, [label %return, label %altret] + +return: + call void @ear(i32 1000) + ret void +altret: + call void @far(i32 1001) + ret void +} + +; BranchFolding shouldn't try to merge the tails of two blocks +; with only a branch in common, regardless of the fallthrough situation. + +; CHECK: dont_merge_oddly: +; CHECK-NOT: ret +; CHECK: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jbe .LBB3_3 +; CHECK-NEXT: ucomiss %xmm2, %xmm0 +; CHECK-NEXT: ja .LBB3_4 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: ucomiss %xmm2, %xmm1 +; CHECK-NEXT: jbe .LBB3_2 +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: xorb %al, %al +; CHECK-NEXT: ret + +define i1 @dont_merge_oddly(float* %result) nounwind { +entry: + %tmp4 = getelementptr float* %result, i32 2 + %tmp5 = load float* %tmp4, align 4 + %tmp7 = getelementptr float* %result, i32 4 + %tmp8 = load float* %tmp7, align 4 + %tmp10 = getelementptr float* %result, i32 6 + %tmp11 = load float* %tmp10, align 4 + %tmp12 = fcmp olt float %tmp8, %tmp11 + br i1 %tmp12, label %bb, label %bb21 + +bb: + %tmp23469 = fcmp olt float %tmp5, %tmp8 + br i1 %tmp23469, label %bb26, label %bb30 + +bb21: + %tmp23 = fcmp olt float %tmp5, %tmp11 + br i1 %tmp23, label %bb26, label %bb30 + +bb26: + ret i1 0 + +bb30: + ret i1 1 +} + +; Do any-size tail-merging when two candidate blocks will both require +; an unconditional jump to complete a two-way conditional branch. + +; CHECK: c_expand_expr_stmt: +; CHECK: jmp .LBB4_7 +; CHECK-NEXT: .LBB4_12: +; CHECK-NEXT: movq 8(%rax), %rax +; CHECK-NEXT: movb 16(%rax), %al +; CHECK-NEXT: cmpb $16, %al +; CHECK-NEXT: je .LBB4_6 +; CHECK-NEXT: cmpb $23, %al +; CHECK-NEXT: je .LBB4_6 +; CHECK-NEXT: jmp .LBB4_15 +; CHECK-NEXT: .LBB4_14: +; CHECK-NEXT: cmpb $23, %bl +; CHECK-NEXT: jne .LBB4_15 +; CHECK-NEXT: .LBB4_15: + +%0 = type { %struct.rtx_def* } +%struct.lang_decl = type opaque +%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] } +%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* } +%union..2anon = type { i32 } +%union.rtunion = type { i8* } +%union.tree_node = type { %struct.tree_decl } + +define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind { +entry: + %tmp4 = load i8* null, align 8 ; <i8> [#uses=3] + switch i8 %tmp4, label %bb3 [ + i8 18, label %bb + ] + +bb: ; preds = %entry + switch i32 undef, label %bb1 [ + i32 0, label %bb2.i + i32 37, label %bb.i + ] + +bb.i: ; preds = %bb + switch i32 undef, label %bb1 [ + i32 0, label %lvalue_p.exit + ] + +bb2.i: ; preds = %bb + br label %bb3 + +lvalue_p.exit: ; preds = %bb.i + %tmp21 = load %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3] + %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1] + %tmp23 = load i8* %tmp22, align 8 ; <i8> [#uses=1] + %tmp24 = zext i8 %tmp23 to i32 ; <i32> [#uses=1] + switch i32 %tmp24, label %lvalue_p.exit4 [ + i32 0, label %bb2.i3 + i32 2, label %bb.i1 + ] + +bb.i1: ; preds = %lvalue_p.exit + %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1] + %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1] + %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2] + %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1] + %tmp29 = load i8* %tmp28, align 8 ; <i8> [#uses=1] + %tmp30 = zext i8 %tmp29 to i32 ; <i32> [#uses=1] + switch i32 %tmp30, label %lvalue_p.exit4 [ + i32 0, label %bb2.i.i2 + i32 2, label %bb.i.i + ] + +bb.i.i: ; preds = %bb.i1 + %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1] + %phitmp = icmp ne i32 %tmp34, 0 ; <i1> [#uses=1] + br label %lvalue_p.exit4 + +bb2.i.i2: ; preds = %bb.i1 + %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1] + %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1] + %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1] + %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1] + %tmp39 = load i8* %tmp38, align 8 ; <i8> [#uses=1] + switch i8 %tmp39, label %bb2 [ + i8 16, label %lvalue_p.exit4 + i8 23, label %lvalue_p.exit4 + ] + +bb2.i3: ; preds = %lvalue_p.exit + %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1] + %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1] + %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1] + %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1] + %tmp44 = load i8* %tmp43, align 8 ; <i8> [#uses=1] + switch i8 %tmp44, label %bb2 [ + i8 16, label %lvalue_p.exit4 + i8 23, label %lvalue_p.exit4 + ] + +lvalue_p.exit4: ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit + %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1] + %tmp46 = icmp eq i8 %tmp4, 0 ; <i1> [#uses=1] + %or.cond = or i1 %tmp45, %tmp46 ; <i1> [#uses=1] + br i1 %or.cond, label %bb2, label %bb3 + +bb1: ; preds = %bb2.i.i, %bb.i, %bb + %.old = icmp eq i8 %tmp4, 23 ; <i1> [#uses=1] + br i1 %.old, label %bb2, label %bb3 + +bb2: ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2 + br label %bb3 + +bb3: ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry + %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0] + unreachable +} + +declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly + +declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind + + +; If one tail merging candidate falls through into the other, +; tail merging is likely profitable regardless of how few +; instructions are involved. This function should have only +; one ret instruction. + +; CHECK: foo: +; CHECK: call func +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: ret + +define void @foo(i1* %V) nounwind { +entry: + %t0 = icmp eq i1* %V, null + br i1 %t0, label %return, label %bb + +bb: + call void @func() + ret void + +return: + ret void +} + +declare void @func() + +; one - One instruction may be tail-duplicated even with optsize. + +; CHECK: one: +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $0, XYZ(%rip) + +@XYZ = external global i32 + +define void @one() nounwind optsize { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + unreachable + +return: + ret void +} + +; two - Same as one, but with two instructions in the common +; tail instead of one. This is too much to be merged, given +; the optsize attribute. + +; CHECK: two: +; CHECK-NOT: XYZ +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) +; CHECK-NOT: XYZ +; CHECK: ret + +define void @two() nounwind optsize { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +return: + ret void +} + +; two_nosize - Same as two, but without the optsize attribute. +; Now two instructions are enough to be tail-duplicated. + +; CHECK: two_nosize: +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) + +define void @two_nosize() nounwind { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +return: + ret void +} diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll index a4f87c0..4923df2 100644 --- a/test/CodeGen/X86/tailcall1.ll +++ b/test/CodeGen/X86/tailcall1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL +; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 4 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { entry: ret i32 %a3 @@ -9,3 +9,24 @@ entry: %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1] ret i32 %tmp11 } + +declare fastcc i8* @alias_callee() + +define fastcc noalias i8* @noalias_caller() nounwind { + %p = tail call fastcc i8* @alias_callee() + ret i8* %p +} + +declare fastcc noalias i8* @noalias_callee() + +define fastcc i8* @alias_caller() nounwind { + %p = tail call fastcc noalias i8* @noalias_callee() + ret i8* %p +} + +declare fastcc i32 @i32_callee() + +define fastcc i32 @ret_undef() nounwind { + %p = tail call fastcc i32 @i32_callee() + ret i32 undef +} diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll index 556f103..f4930b0 100644 --- a/test/CodeGen/X86/vec_shuffle-3.ll +++ b/test/CodeGen/X86/vec_shuffle-3.ll @@ -18,4 +18,3 @@ entry: %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] ret <4 x float> %tmp4 } - diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll index e42b538..cdb030e 100644 --- a/test/CodeGen/X86/vec_zero-2.ll +++ b/test/CodeGen/X86/vec_zero-2.ll @@ -12,8 +12,8 @@ bb4743: ; preds = %bb1664 %tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer ; <<8 x i16>> [#uses=1] %tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64> ; <<2 x i64>> [#uses=1] %tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp5266 = call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp5267 = bitcast <8 x i16> %tmp5266 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64> ; <<2 x i64>> [#uses=1] %tmp5294 = and <2 x i64> zeroinitializer, %tmp5267 ; <<2 x i64>> [#uses=1] br label %bb5310 bb5310: ; preds = %bb4743, %bb1664 @@ -21,4 +21,4 @@ bb5310: ; preds = %bb4743, %bb1664 ret i32 0 } -declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone |