diff options
Diffstat (limited to 'test/CodeGen/PowerPC')
39 files changed, 2123 insertions, 40 deletions
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll index 0003a17..b95ac68 100644 --- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll +++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll @@ -9,9 +9,8 @@ target triple = "powerpc-apple-darwin11.0" define void @foo() nounwind ssp { entry: -; Better: mtctr r12 -; CHECK: mr r12, [[REG:r[0-9]+]] -; CHECK: mtctr [[REG]] +; CHECK: mtctr r12 +; CHECK: bctrl %0 = load void (...)** @p, align 4 ; <void (...)*> [#uses=1] call void (...)* %0() nounwind br label %return diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll new file mode 100644 index 0000000..9d2e390 --- /dev/null +++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; This test check if the TOC entry symbol name won't clash with global .LC0 +; and .LC2 symbols defined in the module. + +@.LC0 = internal global [5 x i8] c".LC0\00" +@.LC2 = internal global [5 x i8] c".LC2\00" + +define i32 @foo(double %X, double %Y) nounwind readnone { + ; The 1.0 and 3.0 constants generate two TOC entries + %cmp = fcmp oeq double %X, 1.000000e+00 + %conv = zext i1 %cmp to i32 + %cmp1 = fcmp oeq double %Y, 3.000000e+00 + %conv2 = zext i1 %cmp1 to i32 + %add = add nsw i32 %conv2, %conv + ret i32 %add +} + +; Check the creation of 2 .tc entries for both double constants. They +; should be .LC1 and .LC3 to avoid name clash with global constants +; .LC0 and .LC2 +; CHECK: .LC{{[13]}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} +; CHECK: .LC{{[13]}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} diff --git a/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll new file mode 100644 index 0000000..41533a8 --- /dev/null +++ b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @test(i64 %n) nounwind { +entry: + %0 = alloca i8, i64 %n, align 1 + %1 = alloca i8, i64 %n, align 1 + call void @use(i8* %0, i8* %1) nounwind + ret void +} + +declare void @use(i8*, i8*) + +; Check we actually have two instances of dynamic stack allocation, +; identified by the stdux used to update the back-chain link. +; CHECK: stdux +; CHECK: stdux diff --git a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll new file mode 100644 index 0000000..f841c5f --- /dev/null +++ b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll @@ -0,0 +1,20 @@ +; RUN: llc -mattr=+altivec < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @test(<16 x i8> %v) nounwind { +entry: + %0 = bitcast <16 x i8> %v to i128 + %1 = lshr i128 %0, 96 + %2 = trunc i128 %1 to i32 + ret i32 %2 +} + +; Verify that bitcast handles big-endian platforms correctly +; by checking we load the result from the correct offset + +; CHECK: addi [[REGISTER:[0-9]+]], 1, -16 +; CHECK: stvx 2, 0, [[REGISTER]] +; CHECK: lwz 3, -16(1) +; CHECK: blr + diff --git a/test/CodeGen/PowerPC/asm-Zy.ll b/test/CodeGen/PowerPC/asm-Zy.ll new file mode 100644 index 0000000..691165f --- /dev/null +++ b/test/CodeGen/PowerPC/asm-Zy.ll @@ -0,0 +1,14 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" +; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s + +define i32 @zytest(i32 %a) nounwind { +entry: +; CHECK: @zytest + %r = call i32 asm "lwbrx $0, ${1:y}", "=r,Z"(i32 %a) nounwind, !srcloc !0 + ret i32 %r +; CHECK: lwbrx 3, 0, +} + +!0 = metadata !{i32 101688} + diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll index 9a456b6..638059a 100644 --- a/test/CodeGen/PowerPC/big-endian-formal-args.ll +++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll @@ -2,10 +2,10 @@ declare void @bar(i64 %x, i64 %y) -; CHECK: li {{[53]}}, 0 +; CHECK: li 3, 0 ; CHECK: li 4, 2 +; CHECK: li 5, 0 ; CHECK: li 6, 3 -; CHECK: mr {{[53]}}, {{[53]}} define void @foo() { call void @bar(i64 2, i64 3) diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll deleted file mode 100644 index 386c59e..0000000 --- a/test/CodeGen/PowerPC/bl8_elf_nop.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -declare i32 @clock() nounwind - -define i32 @func() { -entry: - %call = call i32 @clock() nounwind - %call2 = add i32 %call, 7 - ret i32 %call2 -} - -; CHECK: bl clock -; CHECK-NEXT: nop - diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll index cc80f83..f19175c 100644 --- a/test/CodeGen/PowerPC/coalesce-ext.ll +++ b/test/CodeGen/PowerPC/coalesce-ext.ll @@ -13,5 +13,6 @@ define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { store volatile i32 %D, i32* %P ; Reuse low bits of extended register, don't extend live range of SUM. ; CHECK: stw [[EXT]] - ret i32 %D + %R = add i32 %D, %D + ret i32 %R } diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll new file mode 100644 index 0000000..afa1ea8 --- /dev/null +++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-linux" + +@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1 + +define void @test(i32 %count) nounwind { +entry: +; CHECK: crxor 6, 6, 6 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind + %cmp2 = icmp sgt i32 %count, 0 + br i1 %cmp2, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] +; CHECK: crxor 6, 6, 6 + %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind + %inc = add nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, %count + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll new file mode 100644 index 0000000..3e98dbd --- /dev/null +++ b/test/CodeGen/PowerPC/crsave.ll @@ -0,0 +1,49 @@ +; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32 +; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64 + +declare void @foo() + +define i32 @test_cr2() nounwind { +entry: + %ret = alloca i32, align 4 + %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind + store i32 %0, i32* %ret, align 4 + call void @foo() + %1 = load i32* %ret, align 4 + ret i32 %1 +} + +; PPC32: mfcr 12 +; PPC32-NEXT: stw 12, {{[0-9]+}}(31) +; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: mtcrf 32, 12 + +; PPC64: mfcr 12 +; PPC64-NEXT: stw 12, 8(1) +; PPC64: lwz 12, 8(1) +; PPC64-NEXT: mtcrf 32, 12 + +define i32 @test_cr234() nounwind { +entry: + %ret = alloca i32, align 4 + %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09cmp 3,$2,$2\0A\09cmp 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind + store i32 %0, i32* %ret, align 4 + call void @foo() + %1 = load i32* %ret, align 4 + ret i32 %1 +} + +; PPC32: mfcr 12 +; PPC32-NEXT: stw 12, {{[0-9]+}}(31) +; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: mtcrf 32, 12 +; PPC32-NEXT: mtcrf 16, 12 +; PPC32-NEXT: mtcrf 8, 12 + +; PPC64: mfcr 12 +; PPC64-NEXT: stw 12, 8(1) +; PPC64: lwz 12, 8(1) +; PPC64-NEXT: mtcrf 32, 12 +; PPC64-NEXT: mtcrf 16, 12 +; PPC64-NEXT: mtcrf 8, 12 + diff --git a/test/CodeGen/PowerPC/emptystruct.ll b/test/CodeGen/PowerPC/emptystruct.ll new file mode 100644 index 0000000..36b4abd --- /dev/null +++ b/test/CodeGen/PowerPC/emptystruct.ll @@ -0,0 +1,51 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; This tests correct handling of empty aggregate parameters and return values. +; An empty parameter passed by value does not consume a protocol register or +; a parameter save area doubleword. An empty parameter passed by reference +; is treated as any other pointer parameter. An empty aggregate return value +; is treated as any other aggregate return value, passed via address as a +; hidden parameter in GPR3. In this example, GPR3 contains the return value +; address, GPR4 contains the address of e2, and e1 and e3 are not passed or +; received. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.empty = type {} + +define void @callee(%struct.empty* noalias sret %agg.result, %struct.empty* byval %a1, %struct.empty* %a2, %struct.empty* byval %a3) nounwind { +entry: + %a2.addr = alloca %struct.empty*, align 8 + store %struct.empty* %a2, %struct.empty** %a2.addr, align 8 + %0 = load %struct.empty** %a2.addr, align 8 + %1 = bitcast %struct.empty* %agg.result to i8* + %2 = bitcast %struct.empty* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false) + ret void +} + +; CHECK: callee: +; CHECK: std 4, +; CHECK: std 3, +; CHECK-NOT: std 5, +; CHECK-NOT: std 6, +; CHECK: blr + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define void @caller(%struct.empty* noalias sret %agg.result) nounwind { +entry: + %e1 = alloca %struct.empty, align 1 + %e2 = alloca %struct.empty, align 1 + %e3 = alloca %struct.empty, align 1 + call void @callee(%struct.empty* sret %agg.result, %struct.empty* byval %e1, %struct.empty* %e2, %struct.empty* byval %e3) + ret void +} + +; CHECK: caller: +; CHECK: addi 4, +; CHECK: std 3, +; CHECK-NOT: std 5, +; CHECK-NOT: std 6, +; CHECK: bl callee diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll new file mode 100644 index 0000000..b5631a1 --- /dev/null +++ b/test/CodeGen/PowerPC/floatPSA.ll @@ -0,0 +1,97 @@ +; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s + +; This verifies that single-precision floating point values that can't +; be passed in registers are stored in the rightmost word of the parameter +; save area slot. There are 13 architected floating-point registers, so +; the 14th is passed in storage. The address of the 14th argument is +; 48 (fixed size of the linkage area) + 13 * 8 (first 13 args) + 4 +; (offset to second word) = 156. + +define float @bar(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m, float %n) nounwind { +entry: + %a.addr = alloca float, align 4 + %b.addr = alloca float, align 4 + %c.addr = alloca float, align 4 + %d.addr = alloca float, align 4 + %e.addr = alloca float, align 4 + %f.addr = alloca float, align 4 + %g.addr = alloca float, align 4 + %h.addr = alloca float, align 4 + %i.addr = alloca float, align 4 + %j.addr = alloca float, align 4 + %k.addr = alloca float, align 4 + %l.addr = alloca float, align 4 + %m.addr = alloca float, align 4 + %n.addr = alloca float, align 4 + store float %a, float* %a.addr, align 4 + store float %b, float* %b.addr, align 4 + store float %c, float* %c.addr, align 4 + store float %d, float* %d.addr, align 4 + store float %e, float* %e.addr, align 4 + store float %f, float* %f.addr, align 4 + store float %g, float* %g.addr, align 4 + store float %h, float* %h.addr, align 4 + store float %i, float* %i.addr, align 4 + store float %j, float* %j.addr, align 4 + store float %k, float* %k.addr, align 4 + store float %l, float* %l.addr, align 4 + store float %m, float* %m.addr, align 4 + store float %n, float* %n.addr, align 4 + %0 = load float* %n.addr, align 4 + ret float %0 +} + +; CHECK: lfs {{[0-9]+}}, 156(1) + +define float @foo() nounwind { +entry: + %a = alloca float, align 4 + %b = alloca float, align 4 + %c = alloca float, align 4 + %d = alloca float, align 4 + %e = alloca float, align 4 + %f = alloca float, align 4 + %g = alloca float, align 4 + %h = alloca float, align 4 + %i = alloca float, align 4 + %j = alloca float, align 4 + %k = alloca float, align 4 + %l = alloca float, align 4 + %m = alloca float, align 4 + %n = alloca float, align 4 + store float 1.000000e+00, float* %a, align 4 + store float 2.000000e+00, float* %b, align 4 + store float 3.000000e+00, float* %c, align 4 + store float 4.000000e+00, float* %d, align 4 + store float 5.000000e+00, float* %e, align 4 + store float 6.000000e+00, float* %f, align 4 + store float 7.000000e+00, float* %g, align 4 + store float 8.000000e+00, float* %h, align 4 + store float 9.000000e+00, float* %i, align 4 + store float 1.000000e+01, float* %j, align 4 + store float 1.100000e+01, float* %k, align 4 + store float 1.200000e+01, float* %l, align 4 + store float 1.300000e+01, float* %m, align 4 + store float 1.400000e+01, float* %n, align 4 + %0 = load float* %a, align 4 + %1 = load float* %b, align 4 + %2 = load float* %c, align 4 + %3 = load float* %d, align 4 + %4 = load float* %e, align 4 + %5 = load float* %f, align 4 + %6 = load float* %g, align 4 + %7 = load float* %h, align 4 + %8 = load float* %i, align 4 + %9 = load float* %j, align 4 + %10 = load float* %k, align 4 + %11 = load float* %l, align 4 + %12 = load float* %m, align 4 + %13 = load float* %n, align 4 + %call = call float @bar(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) + ret float %call +} + +; Note that stw is used instead of stfs because the value is a simple +; constant that can be created with a load-immediate in a GPR. +; CHECK: stw {{[0-9]+}}, 156(1) + diff --git a/test/CodeGen/PowerPC/fsl-e500mc.ll b/test/CodeGen/PowerPC/fsl-e500mc.ll new file mode 100644 index 0000000..09b7e41 --- /dev/null +++ b/test/CodeGen/PowerPC/fsl-e500mc.ll @@ -0,0 +1,22 @@ +; +; Test support for Freescale e500mc and its higher memcpy inlining thresholds. +; +; RUN: llc -mcpu=e500mc < %s 2>&1 | FileCheck %s +; CHECK-NOT: not a recognized processor for this target + +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-fsl-linux" + +%struct.teststruct = type { [12 x i32], i32 } + +define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind { +entry: +; CHECK: @copy +; CHECK-NOT: bl memcpy + %0 = bitcast %struct.teststruct* %agg.result to i8* + %1 = bitcast %struct.teststruct* %in to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 52, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/CodeGen/PowerPC/fsl-e5500.ll b/test/CodeGen/PowerPC/fsl-e5500.ll new file mode 100644 index 0000000..d47d8c8 --- /dev/null +++ b/test/CodeGen/PowerPC/fsl-e5500.ll @@ -0,0 +1,22 @@ +; +; Test support for Freescale e5500 and its higher memcpy inlining thresholds. +; +; RUN: llc -mcpu=e5500 < %s 2>&1 | FileCheck %s +; CHECK-NOT: not a recognized processor for this target + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-fsl-linux" + +%struct.teststruct = type { [24 x i32], i32 } + +define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind { +entry: +; CHECK: @copy +; CHECK-NOT: bl memcpy + %0 = bitcast %struct.teststruct* %agg.result to i8* + %1 = bitcast %struct.teststruct* %in to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 100, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll new file mode 100644 index 0000000..5a0c072 --- /dev/null +++ b/test/CodeGen/PowerPC/i64_fp_round.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @test(i64 %x) nounwind readnone { +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +; Verify that we get the code sequence needed to avoid double-rounding. +; Note that only parts of the sequence are checked for here, to allow +; for minor code generation differences. + +; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53 +; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1 +; CHECK: cmpldi 0, [[REGISTER]], 1 +; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1 +; CHECK: std [[REGISTER]], -{{[0-9]+}}(1) + + +; Also check that with -enable-unsafe-fp-math we do not get that extra +; code sequence. Simply verify that there is no "isel" present. + +; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE +; CHECK-UNSAFE-NOT: isel + diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll index e1ff82d..59c3388 100644 --- a/test/CodeGen/PowerPC/inlineasm-copy.ll +++ b/test/CodeGen/PowerPC/inlineasm-copy.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=ppc32 | not grep mr +; RUN: llc < %s -march=ppc32 -verify-machineinstrs | FileCheck %s +; CHECK-NOT: mr define i32 @test(i32 %Y, i32 %X) { entry: %tmp = tail call i32 asm "foo $0", "=r"( ) ; <i32> [#uses=1] @@ -12,3 +13,9 @@ entry: ret i32 %tmp1 } +; CHECK: test3 +define i32 @test3(i32 %Y, i32 %X) { +entry: + %tmp1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "foo $0, $1", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"( i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y ) ; <i32> [#uses=1] + ret i32 1 +} diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll index 6c82723..d2887b9 100644 --- a/test/CodeGen/PowerPC/int-fp-conv-1.ll +++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=ppc64 | grep __floatditf +; RUN: llc < %s -march=ppc64 | FileCheck %s +; CHECK-NOT: __floatditf define i64 @__fixunstfdi(ppc_fp128 %a) nounwind { entry: diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll new file mode 100644 index 0000000..62aa7cf --- /dev/null +++ b/test/CodeGen/PowerPC/jaggedstructs.ll @@ -0,0 +1,48 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; This tests receiving and re-passing parameters consisting of structures +; of size 3, 5, 6, and 7. They are to be found/placed right-adjusted in +; the parameter registers. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S3 = type { [3 x i8] } +%struct.S5 = type { [5 x i8] } +%struct.S6 = type { [6 x i8] } +%struct.S7 = type { [7 x i8] } + +define void @test(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) nounwind { +entry: + call void @check(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) + ret void +} + +; CHECK: std 6, 216(1) +; CHECK: std 5, 208(1) +; CHECK: std 4, 200(1) +; CHECK: std 3, 192(1) +; CHECK: lbz {{[0-9]+}}, 199(1) +; CHECK: stb {{[0-9]+}}, 55(1) +; CHECK: lhz {{[0-9]+}}, 197(1) +; CHECK: sth {{[0-9]+}}, 53(1) +; CHECK: lbz {{[0-9]+}}, 207(1) +; CHECK: stb {{[0-9]+}}, 63(1) +; CHECK: lwz {{[0-9]+}}, 203(1) +; CHECK: stw {{[0-9]+}}, 59(1) +; CHECK: lhz {{[0-9]+}}, 214(1) +; CHECK: sth {{[0-9]+}}, 70(1) +; CHECK: lwz {{[0-9]+}}, 210(1) +; CHECK: stw {{[0-9]+}}, 66(1) +; CHECK: lbz {{[0-9]+}}, 223(1) +; CHECK: stb {{[0-9]+}}, 79(1) +; CHECK: lhz {{[0-9]+}}, 221(1) +; CHECK: sth {{[0-9]+}}, 77(1) +; CHECK: lwz {{[0-9]+}}, 217(1) +; CHECK: stw {{[0-9]+}}, 73(1) +; CHECK: ld 6, 72(1) +; CHECK: ld 5, 64(1) +; CHECK: ld 4, 56(1) +; CHECK: ld 3, 48(1) + +declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval) diff --git a/test/CodeGen/PowerPC/misched.ll b/test/CodeGen/PowerPC/misched.ll new file mode 100644 index 0000000..d6fb3b3 --- /dev/null +++ b/test/CodeGen/PowerPC/misched.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -enable-misched -verify-machineinstrs +; PR14302 +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +@b = external global [16000 x double], align 32 + +define void @pr14302() nounwind { +entry: + tail call void @putchar() nounwind + br label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.body24.i + +for.body24.i: ; preds = %for.body24.i, %for.body + store double 1.000000e+00, double* undef, align 8 + br i1 undef, label %for.body24.i58, label %for.body24.i + +for.body24.i58: ; preds = %for.body24.i58, %for.body24.i + %arrayidx26.i55.1 = getelementptr inbounds [16000 x double]* @b, i64 0, i64 undef + store double 1.000000e+00, double* %arrayidx26.i55.1, align 8 + br i1 undef, label %for.body24.i64, label %for.body24.i58 + +for.body24.i64: ; preds = %for.body24.i64, %for.body24.i58 + %exitcond.2489 = icmp eq i32 0, 16000 + br i1 %exitcond.2489, label %for.body24.i70, label %for.body24.i64 + +for.body24.i70: ; preds = %for.body24.i70, %for.body24.i64 + br i1 undef, label %for.body24.i76, label %for.body24.i70 + +for.body24.i76: ; preds = %for.body24.i76, %for.body24.i70 + br i1 undef, label %set1d.exit77, label %for.body24.i76 + +set1d.exit77: ; preds = %for.body24.i76 + br label %for.body29 + +for.body29: ; preds = %for.body29, %set1d.exit77 + br i1 undef, label %for.end35, label %for.body29 + +for.end35: ; preds = %for.body29 + ret void +} + +declare void @putchar() diff --git a/test/CodeGen/PowerPC/novrsave.ll b/test/CodeGen/PowerPC/novrsave.ll new file mode 100644 index 0000000..a70576a --- /dev/null +++ b/test/CodeGen/PowerPC/novrsave.ll @@ -0,0 +1,15 @@ +; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s + +; This verifies that the code to update VRSAVE has been removed for SVR4. + +define <4 x float> @bar(<4 x float> %v) nounwind { +entry: + %v.addr = alloca <4 x float>, align 16 + store <4 x float> %v, <4 x float>* %v.addr, align 16 + %0 = load <4 x float>* %v.addr, align 16 + ret <4 x float> %0 +} + +; CHECK-NOT: mfspr +; CHECK-NOT: mtspr diff --git a/test/CodeGen/PowerPC/ppc64-abi-extend.ll b/test/CodeGen/PowerPC/ppc64-abi-extend.ll new file mode 100644 index 0000000..8baf1c6 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-abi-extend.ll @@ -0,0 +1,97 @@ +; Verify that i32 argument/return values are extended to i64 + +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@si = common global i32 0, align 4 +@ui = common global i32 0, align 4 + +declare void @arg_si(i32 signext) +declare void @arg_ui(i32 zeroext) + +declare signext i32 @ret_si() +declare zeroext i32 @ret_ui() + +define void @pass_arg_si() nounwind { +entry: + %0 = load i32* @si, align 4 + tail call void @arg_si(i32 signext %0) nounwind + ret void +} +; CHECK: @pass_arg_si +; CHECK: lwa 3, +; CHECK: bl arg_si + +define void @pass_arg_ui() nounwind { +entry: + %0 = load i32* @ui, align 4 + tail call void @arg_ui(i32 zeroext %0) nounwind + ret void +} +; CHECK: @pass_arg_ui +; CHECK: lwz 3, +; CHECK: bl arg_ui + +define i64 @use_arg_si(i32 signext %x) nounwind readnone { +entry: + %conv = sext i32 %x to i64 + ret i64 %conv +} +; CHECK: @use_arg_si +; CHECK: %entry +; CHECK-NEXT: blr + +define i64 @use_arg_ui(i32 zeroext %x) nounwind readnone { +entry: + %conv = zext i32 %x to i64 + ret i64 %conv +} +; CHECK: @use_arg_ui +; CHECK: %entry +; CHECK-NEXT: blr + +define signext i32 @pass_ret_si() nounwind readonly { +entry: + %0 = load i32* @si, align 4 + ret i32 %0 +} +; CHECK: @pass_ret_si +; CHECK: lwa 3, +; CHECK: blr + +define zeroext i32 @pass_ret_ui() nounwind readonly { +entry: + %0 = load i32* @ui, align 4 + ret i32 %0 +} +; CHECK: @pass_ret_ui +; CHECK: lwz 3, +; CHECK: blr + +define i64 @use_ret_si() nounwind { +entry: + %call = tail call signext i32 @ret_si() nounwind + %conv = sext i32 %call to i64 + ret i64 %conv +} +; CHECK: @use_ret_si +; CHECK: bl ret_si +; This is to verify the return register (3) set up by the ret_si +; call is passed on unmodified as return value of use_ret_si. +; CHECK-NOT: 3 +; CHECK: blr + +define i64 @use_ret_ui() nounwind { +entry: + %call = tail call zeroext i32 @ret_ui() nounwind + %conv = zext i32 %call to i64 + ret i64 %conv +} +; CHECK: @use_ret_ui +; CHECK: bl ret_ui +; This is to verify the return register (3) set up by the ret_ui +; call is passed on unmodified as return value of use_ret_ui. +; CHECK-NOT: 3 +; CHECK: blr + diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll new file mode 100644 index 0000000..10b70d0 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -0,0 +1,26 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; Verify internal alignment of long double in a struct. The double +; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain +; the long double. Check that these are stored to proper locations +; in the parameter save area and loaded from there for return in FPR1/2. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S = type { double, ppc_fp128 } + +define ppc_fp128 @test(%struct.S* byval %x) nounwind { +entry: + %b = getelementptr inbounds %struct.S* %x, i32 0, i32 1 + %0 = load ppc_fp128* %b, align 16 + ret ppc_fp128 %0 +} + +; CHECK: std 6, 72(1) +; CHECK: std 5, 64(1) +; CHECK: std 4, 56(1) +; CHECK: std 3, 48(1) +; CHECK: lfd 1, 64(1) +; CHECK: lfd 2, 72(1) + diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll new file mode 100644 index 0000000..c382edbb --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-calls.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -march=ppc64 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @foo() nounwind readnone noinline { + ret void +} + +define weak void @foo_weak() nounwind { + ret void +} + +; Calls to local function does not require the TOC restore 'nop' +define void @test_direct() nounwind readnone { +; CHECK: test_direct: + tail call void @foo() nounwind +; CHECK: bl foo +; CHECK-NOT: nop + ret void +} + +; Calls to weak function requires a TOC restore 'nop' because they +; may be overridden in a different module. +define void @test_weak() nounwind readnone { +; CHECK: test_weak: + tail call void @foo_weak() nounwind +; CHECK: bl foo +; CHECK-NEXT: nop + ret void +} + +; Indirect calls requires a full stub creation +define void @test_indirect(void ()* nocapture %fp) nounwind { +; CHECK: test_indirect: + tail call void %fp() nounwind +; CHECK: ld [[FP:[0-9]+]], 0(3) +; CHECK: ld 11, 16(3) +; CHECK: ld 2, 8(3) +; CHECK-NEXT: mtctr [[FP]] +; CHECK-NEXT: bctrl +; CHECK-NEXT: ld 2, 40(1) + ret void +} + +; Absolute vales should be have the TOC restore 'nop' +define void @test_abs() nounwind { +; CHECK: test_abs: + tail call void inttoptr (i64 1024 to void ()*)() nounwind +; CHECK: bla 1024 +; CHECK-NEXT: nop + ret void +} + +declare double @sin(double) nounwind + +; External functions call should also have a 'nop' +define double @test_external(double %x) nounwind { +; CHECK: test_external: + %call = tail call double @sin(double %x) nounwind +; CHECK: bl sin +; CHECK-NEXT: nop + ret double %call +} diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll deleted file mode 100644 index d5c4d46..0000000 --- a/test/CodeGen/PowerPC/ppc64-ind-call.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llc < %s -march=ppc64 | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -define void @test1() { -entry: - %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10) - unreachable -} - -; CHECK: @test1 -; CHECK: ld 11, 0(3) -; CHECK: ld 2, 8(3) -; CHECK: bctrl -; CHECK: ld 2, 40(1) - diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll index e5aa1f1..e1d50ba 100644 --- a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll +++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll @@ -5,6 +5,7 @@ ; CHECK-NEXT: .align 3 ; CHECK-NEXT: .quad .L.test1 ; CHECK-NEXT: .quad .TOC.@tocbase +; CHECK-NEXT: .quad 0 ; CHECK-NEXT: .text ; CHECK-NEXT: .L.test1: diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll new file mode 100644 index 0000000..a29bdcb --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-toc.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@double_array = global [32 x double] zeroinitializer, align 8 +@number64 = global i64 10, align 8 +@internal_static_var.x = internal unnamed_addr global i64 0, align 8 + +define i64 @access_int64(i64 %a) nounwind readonly { +entry: +; CHECK: access_int64: +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .quad .L.access_int64 +; CHECK-NEXT: .quad .TOC.@tocbase +; CHECK-NEXT: .quad 0 +; CHECK-NEXT: .text + %0 = load i64* @number64, align 8 +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2) + %cmp = icmp eq i64 %0, %a + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @internal_static_var(i64 %a) nounwind { +entry: +; CHECK: internal_static_var: +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2) + %0 = load i64* @internal_static_var.x, align 8 + %cmp = icmp eq i64 %0, %a + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i32 @access_double(double %a) nounwind readnone { +entry: +; CHECK: access_double: +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2) + %cmp = fcmp oeq double %a, 2.000000e+00 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +define i32 @access_double_array(double %a, i32 %i) nounwind readonly { +entry: +; CHECK: access_double_array: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom + %0 = load double* %arrayidx, align 8 +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2) + %cmp = fcmp oeq double %0, %a + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; Check the creation of 4 .tc entries: +; * int64_t global 'number64' +; * double constant 2.0 +; * double array 'double_array' +; * static int64_t 'x' accessed within '@internal_static_var' +; CHECK: .LC{{[0-9]+}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} +; CHECK-NEXT: .LC{{[0-9]+}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} +; CHECK-NEXT: .LC{{[0-9]+}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} +; CHECK-NEXT: .LC{{[0-9]+}}: +; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}} diff --git a/test/CodeGen/PowerPC/ppc64-zext.ll b/test/CodeGen/PowerPC/ppc64-zext.ll new file mode 100644 index 0000000..eb55445 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-zext.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux" + +define i64 @fun(i32 %arg32) nounwind { +entry: +; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 + %o = zext i32 %arg32 to i64 + ret i64 %o +} + diff --git a/test/CodeGen/PowerPC/pr12757.ll b/test/CodeGen/PowerPC/pr12757.ll new file mode 100644 index 0000000..c344656 --- /dev/null +++ b/test/CodeGen/PowerPC/pr12757.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @__flt_rounds() nounwind { +entry: + %0 = tail call i64 asm sideeffect "mffs $0", "=f"() nounwind + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK: @__flt_rounds +; CHECK: mffs + diff --git a/test/CodeGen/PowerPC/pr13641.ll b/test/CodeGen/PowerPC/pr13641.ll new file mode 100644 index 0000000..c4d3f3a --- /dev/null +++ b/test/CodeGen/PowerPC/pr13641.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @foo() nounwind { + ret void +} + +; CHECK: blr +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .quad 0 diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll new file mode 100644 index 0000000..3ae7385 --- /dev/null +++ b/test/CodeGen/PowerPC/pr13891.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.foo = type { i8, i8 } + +define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline { +; CHECK: _Z5check3foos: +; CHECK: sth 3, {{[0-9]+}}(1) +; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1) +entry: + %0 = bitcast %struct.foo* %f to i16* + %1 = load i16* %0, align 2 + %bf.val.sext = ashr i16 %1, 8 + %cmp = icmp eq i16 %bf.val.sext, %i + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %conv = sext i16 %bf.val.sext to i32 + tail call void @exit(i32 %conv) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +declare void @exit(i32) diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll new file mode 100644 index 0000000..520921f --- /dev/null +++ b/test/CodeGen/PowerPC/remat-imm.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s | FileCheck %s +; ModuleID = 'test.c' +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-linux" + +@.str = private unnamed_addr constant [6 x i8] c"%d,%d\00", align 1 + +define i32 @main() nounwind { +entry: +; CHECK: li 4, 128 +; CHECK-NOT: mr 4, {{.*}} + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll new file mode 100644 index 0000000..884d3a8 --- /dev/null +++ b/test/CodeGen/PowerPC/structsinmem.ll @@ -0,0 +1,227 @@ +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s + +; FIXME: The code generation for packed structs is very poor because the +; PowerPC target wrongly rejects all unaligned loads. This test case will +; need to be revised when that is fixed. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s1 = type { i8 } +%struct.s2 = type { i16 } +%struct.s4 = type { i32 } +%struct.t1 = type { i8 } +%struct.t3 = type <{ i16, i8 }> +%struct.t5 = type <{ i32, i8 }> +%struct.t6 = type <{ i32, i16 }> +%struct.t7 = type <{ i32, i16, i8 }> +%struct.s3 = type { i16, i8 } +%struct.s5 = type { i32, i8 } +%struct.s6 = type { i32, i16 } +%struct.s7 = type { i32, i16, i8 } +%struct.t2 = type <{ i16 }> +%struct.t4 = type <{ i32 }> + +@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1 +@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2 +@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2 +@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4 +@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4 +@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4 +@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4 +@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1 +@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1 +@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1 +@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1 +@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1 +@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1 +@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1 + +define i32 @caller1() nounwind { +entry: + %p1 = alloca %struct.s1, align 1 + %p2 = alloca %struct.s2, align 2 + %p3 = alloca %struct.s3, align 2 + %p4 = alloca %struct.s4, align 4 + %p5 = alloca %struct.s5, align 4 + %p6 = alloca %struct.s6, align 4 + %p7 = alloca %struct.s7, align 4 + %0 = bitcast %struct.s1* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false) + %1 = bitcast %struct.s2* %p2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false) + %2 = bitcast %struct.s3* %p3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false) + %3 = bitcast %struct.s4* %p4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false) + %4 = bitcast %struct.s5* %p5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false) + %5 = bitcast %struct.s6* %p6 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false) + %6 = bitcast %struct.s7* %p7 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false) + %call = call i32 @callee1(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7) + ret i32 %call + +; CHECK: stb {{[0-9]+}}, 119(1) +; CHECK: sth {{[0-9]+}}, 126(1) +; CHECK: stw {{[0-9]+}}, 132(1) +; CHECK: stw {{[0-9]+}}, 140(1) +; CHECK: std {{[0-9]+}}, 144(1) +; CHECK: std {{[0-9]+}}, 152(1) +; CHECK: std {{[0-9]+}}, 160(1) +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define internal i32 @callee1(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind { +entry: + %z1.addr = alloca i32, align 4 + %z2.addr = alloca i32, align 4 + %z3.addr = alloca i32, align 4 + %z4.addr = alloca i32, align 4 + %z5.addr = alloca i32, align 4 + %z6.addr = alloca i32, align 4 + %z7.addr = alloca i32, align 4 + %z8.addr = alloca i32, align 4 + store i32 %z1, i32* %z1.addr, align 4 + store i32 %z2, i32* %z2.addr, align 4 + store i32 %z3, i32* %z3.addr, align 4 + store i32 %z4, i32* %z4.addr, align 4 + store i32 %z5, i32* %z5.addr, align 4 + store i32 %z6, i32* %z6.addr, align 4 + store i32 %z7, i32* %z7.addr, align 4 + store i32 %z8, i32* %z8.addr, align 4 + %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0 + %0 = load i8* %a, align 1 + %conv = zext i8 %0 to i32 + %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0 + %1 = load i16* %a1, align 2 + %conv2 = sext i16 %1 to i32 + %add = add nsw i32 %conv, %conv2 + %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0 + %2 = load i16* %a3, align 2 + %conv4 = sext i16 %2 to i32 + %add5 = add nsw i32 %add, %conv4 + %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0 + %3 = load i32* %a6, align 4 + %add7 = add nsw i32 %add5, %3 + %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0 + %4 = load i32* %a8, align 4 + %add9 = add nsw i32 %add7, %4 + %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0 + %5 = load i32* %a10, align 4 + %add11 = add nsw i32 %add9, %5 + %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0 + %6 = load i32* %a12, align 4 + %add13 = add nsw i32 %add11, %6 + ret i32 %add13 + +; CHECK: lha {{[0-9]+}}, 126(1) +; CHECK: lbz {{[0-9]+}}, 119(1) +; CHECK: lha {{[0-9]+}}, 132(1) +; CHECK: lwz {{[0-9]+}}, 140(1) +; CHECK: lwz {{[0-9]+}}, 144(1) +; CHECK: lwz {{[0-9]+}}, 152(1) +; CHECK: lwz {{[0-9]+}}, 160(1) +} + +define i32 @caller2() nounwind { +entry: + %p1 = alloca %struct.t1, align 1 + %p2 = alloca %struct.t2, align 1 + %p3 = alloca %struct.t3, align 1 + %p4 = alloca %struct.t4, align 1 + %p5 = alloca %struct.t5, align 1 + %p6 = alloca %struct.t6, align 1 + %p7 = alloca %struct.t7, align 1 + %0 = bitcast %struct.t1* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false) + %1 = bitcast %struct.t2* %p2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false) + %2 = bitcast %struct.t3* %p3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false) + %3 = bitcast %struct.t4* %p4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false) + %4 = bitcast %struct.t5* %p5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false) + %5 = bitcast %struct.t6* %p6 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false) + %6 = bitcast %struct.t7* %p7 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false) + %call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) + ret i32 %call + +; CHECK: stb {{[0-9]+}}, 119(1) +; CHECK: sth {{[0-9]+}}, 126(1) +; CHECK: stb {{[0-9]+}}, 135(1) +; CHECK: sth {{[0-9]+}}, 133(1) +; CHECK: stw {{[0-9]+}}, 140(1) +; CHECK: stb {{[0-9]+}}, 151(1) +; CHECK: stw {{[0-9]+}}, 147(1) +; CHECK: sth {{[0-9]+}}, 158(1) +; CHECK: stw {{[0-9]+}}, 154(1) +; CHECK: stb {{[0-9]+}}, 167(1) +; CHECK: sth {{[0-9]+}}, 165(1) +; CHECK: stw {{[0-9]+}}, 161(1) +} + +define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { +entry: + %z1.addr = alloca i32, align 4 + %z2.addr = alloca i32, align 4 + %z3.addr = alloca i32, align 4 + %z4.addr = alloca i32, align 4 + %z5.addr = alloca i32, align 4 + %z6.addr = alloca i32, align 4 + %z7.addr = alloca i32, align 4 + %z8.addr = alloca i32, align 4 + store i32 %z1, i32* %z1.addr, align 4 + store i32 %z2, i32* %z2.addr, align 4 + store i32 %z3, i32* %z3.addr, align 4 + store i32 %z4, i32* %z4.addr, align 4 + store i32 %z5, i32* %z5.addr, align 4 + store i32 %z6, i32* %z6.addr, align 4 + store i32 %z7, i32* %z7.addr, align 4 + store i32 %z8, i32* %z8.addr, align 4 + %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0 + %0 = load i8* %a, align 1 + %conv = zext i8 %0 to i32 + %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0 + %1 = load i16* %a1, align 1 + %conv2 = sext i16 %1 to i32 + %add = add nsw i32 %conv, %conv2 + %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0 + %2 = load i16* %a3, align 1 + %conv4 = sext i16 %2 to i32 + %add5 = add nsw i32 %add, %conv4 + %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0 + %3 = load i32* %a6, align 1 + %add7 = add nsw i32 %add5, %3 + %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0 + %4 = load i32* %a8, align 1 + %add9 = add nsw i32 %add7, %4 + %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0 + %5 = load i32* %a10, align 1 + %add11 = add nsw i32 %add9, %5 + %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0 + %6 = load i32* %a12, align 1 + %add13 = add nsw i32 %add11, %6 + ret i32 %add13 + +; CHECK: lbz {{[0-9]+}}, 149(1) +; CHECK: lbz {{[0-9]+}}, 150(1) +; CHECK: lbz {{[0-9]+}}, 147(1) +; CHECK: lbz {{[0-9]+}}, 148(1) +; CHECK: lbz {{[0-9]+}}, 133(1) +; CHECK: lbz {{[0-9]+}}, 134(1) +; CHECK: lha {{[0-9]+}}, 126(1) +; CHECK: lbz {{[0-9]+}}, 119(1) +; CHECK: lwz {{[0-9]+}}, 140(1) +; CHECK: lhz {{[0-9]+}}, 154(1) +; CHECK: lhz {{[0-9]+}}, 156(1) +; CHECK: lbz {{[0-9]+}}, 163(1) +; CHECK: lbz {{[0-9]+}}, 164(1) +; CHECK: lbz {{[0-9]+}}, 161(1) +; CHECK: lbz {{[0-9]+}}, 162(1) +} diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll new file mode 100644 index 0000000..ef706af --- /dev/null +++ b/test/CodeGen/PowerPC/structsinregs.ll @@ -0,0 +1,213 @@ +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s + +; FIXME: The code generation for packed structs is very poor because the +; PowerPC target wrongly rejects all unaligned loads. This test case will +; need to be revised when that is fixed. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s1 = type { i8 } +%struct.s2 = type { i16 } +%struct.s4 = type { i32 } +%struct.t1 = type { i8 } +%struct.t3 = type <{ i16, i8 }> +%struct.t5 = type <{ i32, i8 }> +%struct.t6 = type <{ i32, i16 }> +%struct.t7 = type <{ i32, i16, i8 }> +%struct.s3 = type { i16, i8 } +%struct.s5 = type { i32, i8 } +%struct.s6 = type { i32, i16 } +%struct.s7 = type { i32, i16, i8 } +%struct.t2 = type <{ i16 }> +%struct.t4 = type <{ i32 }> + +@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1 +@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2 +@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2 +@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4 +@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4 +@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4 +@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4 +@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1 +@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1 +@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1 +@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1 +@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1 +@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1 +@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1 + +define i32 @caller1() nounwind { +entry: + %p1 = alloca %struct.s1, align 1 + %p2 = alloca %struct.s2, align 2 + %p3 = alloca %struct.s3, align 2 + %p4 = alloca %struct.s4, align 4 + %p5 = alloca %struct.s5, align 4 + %p6 = alloca %struct.s6, align 4 + %p7 = alloca %struct.s7, align 4 + %0 = bitcast %struct.s1* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false) + %1 = bitcast %struct.s2* %p2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false) + %2 = bitcast %struct.s3* %p3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false) + %3 = bitcast %struct.s4* %p4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false) + %4 = bitcast %struct.s5* %p5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false) + %5 = bitcast %struct.s6* %p6 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false) + %6 = bitcast %struct.s7* %p7 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false) + %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7) + ret i32 %call + +; CHECK: ld 9, 128(31) +; CHECK: ld 8, 136(31) +; CHECK: ld 7, 144(31) +; CHECK: lwz 6, 152(31) +; CHECK: lwz 5, 160(31) +; CHECK: lhz 4, 168(31) +; CHECK: lbz 3, 176(31) +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define internal i32 @callee1(%struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind { +entry: + %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0 + %0 = load i8* %a, align 1 + %conv = zext i8 %0 to i32 + %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0 + %1 = load i16* %a1, align 2 + %conv2 = sext i16 %1 to i32 + %add = add nsw i32 %conv, %conv2 + %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0 + %2 = load i16* %a3, align 2 + %conv4 = sext i16 %2 to i32 + %add5 = add nsw i32 %add, %conv4 + %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0 + %3 = load i32* %a6, align 4 + %add7 = add nsw i32 %add5, %3 + %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0 + %4 = load i32* %a8, align 4 + %add9 = add nsw i32 %add7, %4 + %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0 + %5 = load i32* %a10, align 4 + %add11 = add nsw i32 %add9, %5 + %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0 + %6 = load i32* %a12, align 4 + %add13 = add nsw i32 %add11, %6 + ret i32 %add13 + +; CHECK: std 9, 96(1) +; CHECK: std 8, 88(1) +; CHECK: std 7, 80(1) +; CHECK: stw 6, 76(1) +; CHECK: stw 5, 68(1) +; CHECK: sth 4, 62(1) +; CHECK: stb 3, 55(1) +; CHECK: lha {{[0-9]+}}, 62(1) +; CHECK: lbz {{[0-9]+}}, 55(1) +; CHECK: lha {{[0-9]+}}, 68(1) +; CHECK: lwz {{[0-9]+}}, 76(1) +; CHECK: lwz {{[0-9]+}}, 80(1) +; CHECK: lwz {{[0-9]+}}, 88(1) +; CHECK: lwz {{[0-9]+}}, 96(1) +} + +define i32 @caller2() nounwind { +entry: + %p1 = alloca %struct.t1, align 1 + %p2 = alloca %struct.t2, align 1 + %p3 = alloca %struct.t3, align 1 + %p4 = alloca %struct.t4, align 1 + %p5 = alloca %struct.t5, align 1 + %p6 = alloca %struct.t6, align 1 + %p7 = alloca %struct.t7, align 1 + %0 = bitcast %struct.t1* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false) + %1 = bitcast %struct.t2* %p2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false) + %2 = bitcast %struct.t3* %p3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false) + %3 = bitcast %struct.t4* %p4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false) + %4 = bitcast %struct.t5* %p5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false) + %5 = bitcast %struct.t6* %p6 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false) + %6 = bitcast %struct.t7* %p7 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false) + %call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) + ret i32 %call + +; CHECK: stb {{[0-9]+}}, 71(1) +; CHECK: sth {{[0-9]+}}, 69(1) +; CHECK: stb {{[0-9]+}}, 87(1) +; CHECK: stw {{[0-9]+}}, 83(1) +; CHECK: sth {{[0-9]+}}, 94(1) +; CHECK: stw {{[0-9]+}}, 90(1) +; CHECK: stb {{[0-9]+}}, 103(1) +; CHECK: sth {{[0-9]+}}, 101(1) +; CHECK: stw {{[0-9]+}}, 97(1) +; CHECK: ld 9, 96(1) +; CHECK: ld 8, 88(1) +; CHECK: ld 7, 80(1) +; CHECK: lwz 6, 152(31) +; CHECK: ld 5, 64(1) +; CHECK: lhz 4, 168(31) +; CHECK: lbz 3, 176(31) +} + +define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { +entry: + %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0 + %0 = load i8* %a, align 1 + %conv = zext i8 %0 to i32 + %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0 + %1 = load i16* %a1, align 1 + %conv2 = sext i16 %1 to i32 + %add = add nsw i32 %conv, %conv2 + %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0 + %2 = load i16* %a3, align 1 + %conv4 = sext i16 %2 to i32 + %add5 = add nsw i32 %add, %conv4 + %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0 + %3 = load i32* %a6, align 1 + %add7 = add nsw i32 %add5, %3 + %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0 + %4 = load i32* %a8, align 1 + %add9 = add nsw i32 %add7, %4 + %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0 + %5 = load i32* %a10, align 1 + %add11 = add nsw i32 %add9, %5 + %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0 + %6 = load i32* %a12, align 1 + %add13 = add nsw i32 %add11, %6 + ret i32 %add13 + +; CHECK: std 9, 96(1) +; CHECK: std 8, 88(1) +; CHECK: std 7, 80(1) +; CHECK: stw 6, 76(1) +; CHECK: std 5, 64(1) +; CHECK: sth 4, 62(1) +; CHECK: stb 3, 55(1) +; CHECK: lbz {{[0-9]+}}, 85(1) +; CHECK: lbz {{[0-9]+}}, 86(1) +; CHECK: lbz {{[0-9]+}}, 83(1) +; CHECK: lbz {{[0-9]+}}, 84(1) +; CHECK: lbz {{[0-9]+}}, 69(1) +; CHECK: lbz {{[0-9]+}}, 70(1) +; CHECK: lha {{[0-9]+}}, 62(1) +; CHECK: lbz {{[0-9]+}}, 55(1) +; CHECK: lwz {{[0-9]+}}, 76(1) +; CHECK: lhz {{[0-9]+}}, 90(1) +; CHECK: lhz {{[0-9]+}}, 92(1) +; CHECK: lbz {{[0-9]+}}, 99(1) +; CHECK: lbz {{[0-9]+}}, 100(1) +; CHECK: lbz {{[0-9]+}}, 97(1) +; CHECK: lbz {{[0-9]+}}, 98(1) +} diff --git a/test/CodeGen/PowerPC/varargs-struct-float.ll b/test/CodeGen/PowerPC/varargs-struct-float.ll new file mode 100644 index 0000000..fb1835f --- /dev/null +++ b/test/CodeGen/PowerPC/varargs-struct-float.ll @@ -0,0 +1,23 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.Sf1 = type { float } + +define void @foo(float inreg %s.coerce) nounwind { +entry: + %s = alloca %struct.Sf1, align 4 + %coerce.dive = getelementptr %struct.Sf1* %s, i32 0, i32 0 + store float %s.coerce, float* %coerce.dive, align 1 + %coerce.dive1 = getelementptr %struct.Sf1* %s, i32 0, i32 0 + %0 = load float* %coerce.dive1, align 1 + call void (i32, ...)* @testvaSf1(i32 1, float inreg %0) + ret void +} + +; CHECK: stfs {{[0-9]+}}, 60(1) +; CHECK: ld 4, 56(1) +; CHECK: bl + +declare void @testvaSf1(i32, ...) diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll new file mode 100644 index 0000000..3180f46 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_cmp.ll @@ -0,0 +1,527 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s + +; Check vector comparisons using altivec. For non native types, just basic +; comparison instruction check is done. For altivec supported type (16i8, +; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=) +; are checked. + + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define <2 x i8> @v2si8_cmp(<2 x i8> %x, <2 x i8> %y) nounwind readnone { + %cmp = icmp eq <2 x i8> %x, %y + %sext = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %sext +} +; CHECK: v2si8_cmp: +; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <4 x i8> @v4si8_cmp(<4 x i8> %x, <4 x i8> %y) nounwind readnone { + %cmp = icmp eq <4 x i8> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %sext +} +; CHECK: v4si8_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone { + %cmp = icmp eq <8 x i8> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %sext +} +; CHECK: v8si8_cmp: +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +; Adicional tests for v16i8 since it is a altivec native type + +define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone { + %cmp = icmp eq <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_eq: +; CHECK: vcmpequb 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ne <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ne: +; CHECK: vcmpequb [[RET:[0-9]+]], 2, 3 +; CHECK-NOR: vnor 2, [[RET]], [[RET]] + +define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sle <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ule <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp slt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_lt: +; CHECK: vcmpgtsb 2, 3, 2 + +define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ult <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_lt: +; CHECK: vcmpgtub 2, 3, 2 + +define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sgt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_gt: +; CHECK: vcmpgtsb 2, 2, 3 + +define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ugt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_gt: +; CHECK: vcmpgtub 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp uge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + + +define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone { + %cmp = icmp eq <32 x i8> %x, %y + %sext = sext <32 x i1> %cmp to <32 x i8> + ret <32 x i8> %sext +} +; CHECK: v32si8_cmp: +; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <2 x i16> @v2si16_cmp(<2 x i16> %x, <2 x i16> %y) nounwind readnone { + %cmp = icmp eq <2 x i16> %x, %y + %sext = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %sext +} +; CHECK: v2si16_cmp: +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone { + %cmp = icmp eq <4 x i16> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %sext +} +; CHECK: v4si16_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +; Adicional tests for v8i16 since it is an altivec native type + +define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp eq <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_eq: +; CHECK: vcmpequh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ne <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ne: +; CHECK: vcmpequh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sle <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ule <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp slt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_lt: +; CHECK: vcmpgtsh 2, 3, 2 + +define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ult <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_lt: +; CHECK: vcmpgtuh 2, 3, 2 + +define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sgt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_gt: +; CHECK: vcmpgtsh 2, 2, 3 + +define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ugt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_gt: +; CHECK: vcmpgtuh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp uge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + + +define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone { + %cmp = icmp eq <16 x i16> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %sext +} +; CHECK: v16si16_cmp: +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <32 x i16> @v32si16_cmp(<32 x i16> %x, <32 x i16> %y) nounwind readnone { + %cmp = icmp eq <32 x i16> %x, %y + %sext = sext <32 x i1> %cmp to <32 x i16> + ret <32 x i16> %sext +} +; CHECK: v32si16_cmp: +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone { + %cmp = icmp eq <2 x i32> %x, %y + %sext = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %sext +} +; CHECK: v2si32_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +; Adicional tests for v4si32 since it is an altivec native type + +define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp eq <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_eq: +; CHECK: vcmpequw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ne <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ne: +; CHECK: vcmpequw [[RCMP:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RCMP]], [[RCMP]] + +define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sle <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ule <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp slt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_lt: +; CHECK: vcmpgtsw 2, 3, 2 + +define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ult <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_lt: +; CHECK: vcmpgtuw 2, 3, 2 + +define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sgt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_gt: +; CHECK: vcmpgtsw 2, 2, 3 + +define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ugt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_gt: +; CHECK: vcmpgtuw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp uge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + + +define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone { + %cmp = icmp eq <8 x i32> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %sext +} +; CHECK: v8si32_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <16 x i32> @v16si32_cmp(<16 x i32> %x, <16 x i32> %y) nounwind readnone { + %cmp = icmp eq <16 x i32> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %sext +} +; CHECK: v16si32_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <32 x i32> @v32si32_cmp(<32 x i32> %x, <32 x i32> %y) nounwind readnone { + %cmp = icmp eq <32 x i32> %x, %y + %sext = sext <32 x i1> %cmp to <32 x i32> + ret <32 x i32> %sext +} +; CHECK: v32si32_cmp: +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +define <2 x float> @v2f32_cmp(<2 x float> %x, <2 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oeq <2 x float> %x, %y + %sext = sext <2 x i1> %cmp to <2 x i32> + %0 = bitcast <2 x i32> %sext to <2 x float> + ret <2 x float> %0 +} +; CHECK: v2f32_cmp: +; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} + + +; Adicional tests for v4f32 since it is a altivec native type + +define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oeq <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_eq: +; CHECK: vcmpeqfp 2, 2, 3 + +define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp une <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ne: +; CHECK: vcmpeqfp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ole <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_le: +; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp olt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_lt: +; CHECK: vcmpgtfp 2, 3, 2 + +define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oge <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ge: +; CHECK: vcmpgefp 2, 2, 3 + +define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ogt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_gt: +; CHECK: vcmpgtfp 2, 2, 3 + + +define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oeq <8 x float> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i32> + %0 = bitcast <8 x i32> %sext to <8 x float> + ret <8 x float> %0 +} +; CHECK: v8f32_cmp: +; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} diff --git a/test/CodeGen/PowerPC/vec_conv.ll b/test/CodeGen/PowerPC/vec_conv.ll new file mode 100644 index 0000000..a475e94 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_conv.ll @@ -0,0 +1,57 @@ +; RUN: llc -mattr=+altivec < %s | FileCheck %s + +; Check vector float/int conversion using altivec. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@cte_float = global <4 x float> <float 6.5e+00, float 6.5e+00, float 6.5e+00, float 6.5e+00>, align 16 +@cte_int = global <4 x i32> <i32 6, i32 6, i32 6, i32 6>, align 16 + + +define void @v4f32_to_v4i32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind { +entry: + %0 = load <4 x float>* @cte_float, align 16 + %mul = fmul <4 x float> %0, %x + %1 = fptosi <4 x float> %mul to <4 x i32> + store <4 x i32> %1, <4 x i32>* %y, align 16 + ret void +} +;CHECK: v4f32_to_v4i32: +;CHECK: vctsxs {{[0-9]+}}, {{[0-9]+}}, 0 + + +define void @v4f32_to_v4u32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind { +entry: + %0 = load <4 x float>* @cte_float, align 16 + %mul = fmul <4 x float> %0, %x + %1 = fptoui <4 x float> %mul to <4 x i32> + store <4 x i32> %1, <4 x i32>* %y, align 16 + ret void +} +;CHECK: v4f32_to_v4u32: +;CHECK: vctuxs {{[0-9]+}}, {{[0-9]+}}, 0 + + +define void @v4i32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind { +entry: + %0 = load <4 x i32>* @cte_int, align 16 + %mul = mul <4 x i32> %0, %x + %1 = sitofp <4 x i32> %mul to <4 x float> + store <4 x float> %1, <4 x float>* %y, align 16 + ret void +} +;CHECK: v4i32_to_v4f32: +;CHECK: vcfsx {{[0-9]+}}, {{[0-9]+}}, 0 + + +define void @v4u32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind { +entry: + %0 = load <4 x i32>* @cte_int, align 16 + %mul = mul <4 x i32> %0, %x + %1 = uitofp <4 x i32> %mul to <4 x float> + store <4 x float> %1, <4 x float>* %y, align 16 + ret void +} +;CHECK: v4u32_to_v4f32: +;CHECK: vcfux {{[0-9]+}}, {{[0-9]+}}, 0 diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll new file mode 100644 index 0000000..201c15b --- /dev/null +++ b/test/CodeGen/PowerPC/vec_extload.ll @@ -0,0 +1,155 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s + +; Check vector extend load expansion with altivec enabled. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Altivec does not provides an sext intruction, so it expands +; a set of vector stores (stvx), bytes load/sign expand/store +; (lbz/stb), and a final vector load (lvx) to load the result +; extended vector. +define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = sext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; The zero extend uses a more clever logic: a vector splat +; and a logic and to set higher bits to 0. +define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = zext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_zext_in_reg: +; CHECK: vspltisb [[VMASK:[0-9]+]], 15 +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth). +define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = sext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg, but instead of creating the mask +; with a splat, loads it from memory. +define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = zext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_zext_in_reg: +; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2) +; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]] +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load halfword (lha) and +; store words (stw). +define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = sext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg. +define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = zext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_zext_in_reg: +; CHECK: vspltisw [[VMASK:[0-9]+]], -16 +; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]] +; CHECK-NEXT: vand 2, 2, [[VMASK]] diff --git a/test/CodeGen/PowerPC/vec_sqrt.ll b/test/CodeGen/PowerPC/vec_sqrt.ll new file mode 100644 index 0000000..055da1a --- /dev/null +++ b/test/CodeGen/PowerPC/vec_sqrt.ll @@ -0,0 +1,71 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec,+fsqrt < %s | FileCheck %s + +; Check for vector sqrt expansion using floating-point types, since altivec +; does not provide an fsqrt instruction for vector. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %val) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %val) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float> %val) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double> %val) + +define <2 x float> @v2f32_sqrt(<2 x float> %x) nounwind readnone { +entry: + %sqrt = call <2 x float> @llvm.sqrt.v2f32 (<2 x float> %x) + ret <2 x float> %sqrt +} +; sqrt (<2 x float>) is promoted to sqrt (<4 x float>) +; CHECK: v2f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <4 x float> @v4f32_sqrt(<4 x float> %x) nounwind readnone { +entry: + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %x) + ret <4 x float> %sqrt +} +; CHECK: v4f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <8 x float> @v8f32_sqrt(<8 x float> %x) nounwind readnone { +entry: + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %x) + ret <8 x float> %sqrt +} +; CHECK: v8f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <2 x double> @v2f64_sqrt(<2 x double> %x) nounwind readnone { +entry: + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %x) + ret <2 x double> %sqrt +} +; CHECK: v2f64_sqrt: +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} + +define <4 x double> @v4f64_sqrt(<4 x double> %x) nounwind readnone { +entry: + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %x) + ret <4 x double> %sqrt +} +; CHECK: v4f64_sqrt: +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll new file mode 100644 index 0000000..7641017 --- /dev/null +++ b/test/CodeGen/PowerPC/vrspill.ll @@ -0,0 +1,19 @@ +; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s + +; This verifies that we generate correct spill/reload code for vector regs. + +define void @addrtaken(i32 %i, <4 x float> %w) nounwind { +entry: + %i.addr = alloca i32, align 4 + %w.addr = alloca <4 x float>, align 16 + store i32 %i, i32* %i.addr, align 4 + store <4 x float> %w, <4 x float>* %w.addr, align 16 + call void @foo(i32* %i.addr) + ret void +} + +; CHECK: stvx 2, 0, 0 +; CHECK: lvx 2, 0, 0 + +declare void @foo(i32*) |