diff options
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/annotations.ll | 55 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arithmetic-fp-sm10.ll | 72 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arithmetic-fp-sm20.ll | 72 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arithmetic-int.ll | 295 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/calling-conv.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/compare-int.ll | 389 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/convert-fp.ll | 146 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/convert-int-sm10.ll | 55 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/convert-int-sm20.ll | 64 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/fma-disable.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/fma.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/intrinsic-old.ll | 284 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/intrinsics.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ld-addrspace.ll | 173 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ld-generic.ll | 63 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/lit.local.cfg | 5 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/simple-call.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/st-addrspace.ll | 179 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/st-generic.ll | 69 |
19 files changed, 2041 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll new file mode 100644 index 0000000..d93f688 --- /dev/null +++ b/test/CodeGen/NVPTX/annotations.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + +@texture = internal addrspace(1) global i64 0, align 8 +; CHECK: .global .texref texture +@surface = internal addrspace(1) global i64 0, align 8 +; CHECK: .global .surfref surface + + +; CHECK: .entry kernel_func_maxntid +define void @kernel_func_maxntid(float* %a) { +; CHECK: .maxntid 10, 20, 30 +; CHECK: ret + ret void +} + +; CHECK: .entry kernel_func_reqntid +define void @kernel_func_reqntid(float* %a) { +; CHECK: .reqntid 11, 22, 33 +; CHECK: ret + ret void +} + +; CHECK: .entry kernel_func_minctasm +define void @kernel_func_minctasm(float* %a) { +; CHECK: .minnctapersm 42 +; CHECK: ret + ret void +} + + + +!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8} + +!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1} +!2 = metadata !{void (float*)* @kernel_func_maxntid, + metadata !"maxntidx", i32 10, + metadata !"maxntidy", i32 20, + metadata !"maxntidz", i32 30} + +!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1} +!4 = metadata !{void (float*)* @kernel_func_reqntid, + metadata !"reqntidx", i32 11, + metadata !"reqntidy", i32 22, + metadata !"reqntidz", i32 33} + +!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1} +!6 = metadata !{void (float*)* @kernel_func_minctasm, + metadata !"minctasm", i32 42} + +!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1} +!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1} diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll new file mode 100644 index 0000000..73c77f5 --- /dev/null +++ b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll @@ -0,0 +1,72 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s + +;; These tests should run for all targets + +;;===-- Basic instruction selection tests ---------------------------------===;; + + +;;; f64 + +define double @fadd_f64(double %a, double %b) { +; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fadd double %a, %b + ret double %ret +} + +define double @fsub_f64(double %a, double %b) { +; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fsub double %a, %b + ret double %ret +} + +define double @fmul_f64(double %a, double %b) { +; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fmul double %a, %b + ret double %ret +} + +define double @fdiv_f64(double %a, double %b) { +; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fdiv double %a, %b + ret double %ret +} + +;; PTX does not have a floating-point rem instruction + + +;;; f32 + +define float @fadd_f32(float %a, float %b) { +; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fadd float %a, %b + ret float %ret +} + +define float @fsub_f32(float %a, float %b) { +; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fsub float %a, %b + ret float %ret +} + +define float @fmul_f32(float %a, float %b) { +; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fmul float %a, %b + ret float %ret +} + +define float @fdiv_f32(float %a, float %b) { +; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fdiv float %a, %b + ret float %ret +} + +;; PTX does not have a floating-point rem instruction diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll new file mode 100644 index 0000000..e474fa4 --- /dev/null +++ b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll @@ -0,0 +1,72 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +;; These tests should run for all targets + +;;===-- Basic instruction selection tests ---------------------------------===;; + + +;;; f64 + +define double @fadd_f64(double %a, double %b) { +; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fadd double %a, %b + ret double %ret +} + +define double @fsub_f64(double %a, double %b) { +; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fsub double %a, %b + ret double %ret +} + +define double @fmul_f64(double %a, double %b) { +; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fmul double %a, %b + ret double %ret +} + +define double @fdiv_f64(double %a, double %b) { +; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} +; CHECK: ret + %ret = fdiv double %a, %b + ret double %ret +} + +;; PTX does not have a floating-point rem instruction + + +;;; f32 + +define float @fadd_f32(float %a, float %b) { +; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fadd float %a, %b + ret float %ret +} + +define float @fsub_f32(float %a, float %b) { +; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fsub float %a, %b + ret float %ret +} + +define float @fmul_f32(float %a, float %b) { +; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fmul float %a, %b + ret float %ret +} + +define float @fdiv_f32(float %a, float %b) { +; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret + %ret = fdiv float %a, %b + ret float %ret +} + +;; PTX does not have a floating-point rem instruction diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll new file mode 100644 index 0000000..529f849 --- /dev/null +++ b/test/CodeGen/NVPTX/arithmetic-int.ll @@ -0,0 +1,295 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +;; These tests should run for all targets + +;;===-- Basic instruction selection tests ---------------------------------===;; + + +;;; i64 + +define i64 @add_i64(i64 %a, i64 %b) { +; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = add i64 %a, %b + ret i64 %ret +} + +define i64 @sub_i64(i64 %a, i64 %b) { +; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = sub i64 %a, %b + ret i64 %ret +} + +define i64 @mul_i64(i64 %a, i64 %b) { +; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = mul i64 %a, %b + ret i64 %ret +} + +define i64 @sdiv_i64(i64 %a, i64 %b) { +; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = sdiv i64 %a, %b + ret i64 %ret +} + +define i64 @udiv_i64(i64 %a, i64 %b) { +; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = udiv i64 %a, %b + ret i64 %ret +} + +define i64 @srem_i64(i64 %a, i64 %b) { +; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = srem i64 %a, %b + ret i64 %ret +} + +define i64 @urem_i64(i64 %a, i64 %b) { +; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = urem i64 %a, %b + ret i64 %ret +} + +define i64 @and_i64(i64 %a, i64 %b) { +; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = and i64 %a, %b + ret i64 %ret +} + +define i64 @or_i64(i64 %a, i64 %b) { +; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = or i64 %a, %b + ret i64 %ret +} + +define i64 @xor_i64(i64 %a, i64 %b) { +; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %ret = xor i64 %a, %b + ret i64 %ret +} + +define i64 @shl_i64(i64 %a, i64 %b) { +; PTX requires 32-bit shift amount +; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = shl i64 %a, %b + ret i64 %ret +} + +define i64 @ashr_i64(i64 %a, i64 %b) { +; PTX requires 32-bit shift amount +; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = ashr i64 %a, %b + ret i64 %ret +} + +define i64 @lshr_i64(i64 %a, i64 %b) { +; PTX requires 32-bit shift amount +; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = lshr i64 %a, %b + ret i64 %ret +} + + +;;; i32 + +define i32 @add_i32(i32 %a, i32 %b) { +; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = add i32 %a, %b + ret i32 %ret +} + +define i32 @sub_i32(i32 %a, i32 %b) { +; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = sub i32 %a, %b + ret i32 %ret +} + +define i32 @mul_i32(i32 %a, i32 %b) { +; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = mul i32 %a, %b + ret i32 %ret +} + +define i32 @sdiv_i32(i32 %a, i32 %b) { +; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = sdiv i32 %a, %b + ret i32 %ret +} + +define i32 @udiv_i32(i32 %a, i32 %b) { +; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = udiv i32 %a, %b + ret i32 %ret +} + +define i32 @srem_i32(i32 %a, i32 %b) { +; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = srem i32 %a, %b + ret i32 %ret +} + +define i32 @urem_i32(i32 %a, i32 %b) { +; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = urem i32 %a, %b + ret i32 %ret +} + +define i32 @and_i32(i32 %a, i32 %b) { +; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = and i32 %a, %b + ret i32 %ret +} + +define i32 @or_i32(i32 %a, i32 %b) { +; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = or i32 %a, %b + ret i32 %ret +} + +define i32 @xor_i32(i32 %a, i32 %b) { +; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = xor i32 %a, %b + ret i32 %ret +} + +define i32 @shl_i32(i32 %a, i32 %b) { +; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = shl i32 %a, %b + ret i32 %ret +} + +define i32 @ashr_i32(i32 %a, i32 %b) { +; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = ashr i32 %a, %b + ret i32 %ret +} + +define i32 @lshr_i32(i32 %a, i32 %b) { +; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = lshr i32 %a, %b + ret i32 %ret +} + +;;; i16 + +define i16 @add_i16(i16 %a, i16 %b) { +; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = add i16 %a, %b + ret i16 %ret +} + +define i16 @sub_i16(i16 %a, i16 %b) { +; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = sub i16 %a, %b + ret i16 %ret +} + +define i16 @mul_i16(i16 %a, i16 %b) { +; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = mul i16 %a, %b + ret i16 %ret +} + +define i16 @sdiv_i16(i16 %a, i16 %b) { +; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = sdiv i16 %a, %b + ret i16 %ret +} + +define i16 @udiv_i16(i16 %a, i16 %b) { +; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = udiv i16 %a, %b + ret i16 %ret +} + +define i16 @srem_i16(i16 %a, i16 %b) { +; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = srem i16 %a, %b + ret i16 %ret +} + +define i16 @urem_i16(i16 %a, i16 %b) { +; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = urem i16 %a, %b + ret i16 %ret +} + +define i16 @and_i16(i16 %a, i16 %b) { +; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = and i16 %a, %b + ret i16 %ret +} + +define i16 @or_i16(i16 %a, i16 %b) { +; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = or i16 %a, %b + ret i16 %ret +} + +define i16 @xor_i16(i16 %a, i16 %b) { +; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %ret = xor i16 %a, %b + ret i16 %ret +} + +define i16 @shl_i16(i16 %a, i16 %b) { +; PTX requires 32-bit shift amount +; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = shl i16 %a, %b + ret i16 %ret +} + +define i16 @ashr_i16(i16 %a, i16 %b) { +; PTX requires 32-bit shift amount +; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = ashr i16 %a, %b + ret i16 %ret +} + +define i16 @lshr_i16(i16 %a, i16 %b) { +; PTX requires 32-bit shift amount +; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %ret = lshr i16 %a, %b + ret i16 %ret +} diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll new file mode 100644 index 0000000..968203e --- /dev/null +++ b/test/CodeGen/NVPTX/calling-conv.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + +;; Kernel function using ptx_kernel calling conv + +; CHECK: .entry kernel_func +define ptx_kernel void @kernel_func(float* %a) { +; CHECK: ret + ret void +} + +;; Device function +; CHECK: .func device_func +define void @device_func(float* %a) { +; CHECK: ret + ret void +} + +;; Kernel function using NVVM metadata +; CHECK: .entry metadata_kernel +define void @metadata_kernel(float* %a) { +; CHECK: ret + ret void +} + + +!nvvm.annotations = !{!1} + +!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll new file mode 100644 index 0000000..12fc754 --- /dev/null +++ b/test/CodeGen/NVPTX/compare-int.ll @@ -0,0 +1,389 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +;; These tests should run for all targets + +;;===-- Basic instruction selection tests ---------------------------------===;; + + +;;; i64 + +define i64 @icmp_eq_i64(i64 %a, i64 %b) { +; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp eq i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_ne_i64(i64 %a, i64 %b) { +; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ne i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_ugt_i64(i64 %a, i64 %b) { +; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ugt i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_uge_i64(i64 %a, i64 %b) { +; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp uge i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_ult_i64(i64 %a, i64 %b) { +; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ult i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_ule_i64(i64 %a, i64 %b) { +; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ule i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_sgt_i64(i64 %a, i64 %b) { +; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sgt i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_sge_i64(i64 %a, i64 %b) { +; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sge i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_slt_i64(i64 %a, i64 %b) { +; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp slt i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +define i64 @icmp_sle_i64(i64 %a, i64 %b) { +; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sle i64 %a, %b + %ret = zext i1 %cmp to i64 + ret i64 %ret +} + +;;; i32 + +define i32 @icmp_eq_i32(i32 %a, i32 %b) { +; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp eq i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_ne_i32(i32 %a, i32 %b) { +; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ne i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_ugt_i32(i32 %a, i32 %b) { +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ugt i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_uge_i32(i32 %a, i32 %b) { +; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp uge i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_ult_i32(i32 %a, i32 %b) { +; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ult i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_ule_i32(i32 %a, i32 %b) { +; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ule i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_sgt_i32(i32 %a, i32 %b) { +; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sgt i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_sge_i32(i32 %a, i32 %b) { +; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sge i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_slt_i32(i32 %a, i32 %b) { +; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp slt i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @icmp_sle_i32(i32 %a, i32 %b) { +; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sle i32 %a, %b + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + + +;;; i16 + +define i16 @icmp_eq_i16(i16 %a, i16 %b) { +; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp eq i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_ne_i16(i16 %a, i16 %b) { +; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ne i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_ugt_i16(i16 %a, i16 %b) { +; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ugt i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_uge_i16(i16 %a, i16 %b) { +; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp uge i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_ult_i16(i16 %a, i16 %b) { +; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ult i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_ule_i16(i16 %a, i16 %b) { +; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ule i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_sgt_i16(i16 %a, i16 %b) { +; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sgt i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_sge_i16(i16 %a, i16 %b) { +; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sge i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_slt_i16(i16 %a, i16 %b) { +; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp slt i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + +define i16 @icmp_sle_i16(i16 %a, i16 %b) { +; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sle i16 %a, %b + %ret = zext i1 %cmp to i16 + ret i16 %ret +} + + +;;; i8 + +define i8 @icmp_eq_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp eq i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_ne_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ne i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_ugt_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ugt i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_uge_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp uge i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_ult_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ult i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_ule_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp ule i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_sgt_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sgt i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_sge_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sge i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_slt_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp slt i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} + +define i8 @icmp_sle_i8(i8 %a, i8 %b) { +; Comparison happens in 16-bit +; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}} +; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: ret + %cmp = icmp sle i8 %a, %b + %ret = zext i1 %cmp to i8 + ret i8 %ret +} diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll new file mode 100644 index 0000000..21c8437 --- /dev/null +++ b/test/CodeGen/NVPTX/convert-fp.ll @@ -0,0 +1,146 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + +define i16 @cvt_i16_f32(float %x) { +; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fptoui float %x to i16 + ret i16 %a +} + +define i16 @cvt_i16_f64(double %x) { +; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}}; +; CHECK: ret; + %a = fptoui double %x to i16 + ret i16 %a +} + +define i32 @cvt_i32_f32(float %x) { +; CHECK: cvt.rzi.u32.f32 %r{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fptoui float %x to i32 + ret i32 %a +} + +define i32 @cvt_i32_f64(double %x) { +; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}}; +; CHECK: ret; + %a = fptoui double %x to i32 + ret i32 %a +} + + +define i64 @cvt_i64_f32(float %x) { +; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fptoui float %x to i64 + ret i64 %a +} + +define i64 @cvt_i64_f64(double %x) { +; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}}; +; CHECK: ret; + %a = fptoui double %x to i64 + ret i64 %a +} + +define float @cvt_f32_i16(i16 %x) { +; CHECK: cvt.rn.f32.u16 %f{{[0-9]+}}, %rs{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i16 %x to float + ret float %a +} + +define float @cvt_f32_i32(i32 %x) { +; CHECK: cvt.rn.f32.u32 %f{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i32 %x to float + ret float %a +} + +define float @cvt_f32_i64(i64 %x) { +; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i64 %x to float + ret float %a +} + +define float @cvt_f32_f64(double %x) { +; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}}; +; CHECK: ret; + %a = fptrunc double %x to float + ret float %a +} + +define float @cvt_f32_s16(i16 %x) { +; CHECK: cvt.rn.f32.s16 %f{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %a = sitofp i16 %x to float + ret float %a +} + +define float @cvt_f32_s32(i32 %x) { +; CHECK: cvt.rn.f32.s32 %f{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = sitofp i32 %x to float + ret float %a +} + +define float @cvt_f32_s64(i64 %x) { +; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %a = sitofp i64 %x to float + ret float %a +} + +define double @cvt_f64_i16(i16 %x) { +; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i16 %x to double + ret double %a +} + +define double @cvt_f64_i32(i32 %x) { +; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i32 %x to double + ret double %a +} + +define double @cvt_f64_i64(i64 %x) { +; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}}; +; CHECK: ret; + %a = uitofp i64 %x to double + ret double %a +} + +define double @cvt_f64_f32(float %x) { +; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fpext float %x to double + ret double %a +} + +define double @cvt_f64_s16(i16 %x) { +; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %a = sitofp i16 %x to double + ret double %a +} + +define double @cvt_f64_s32(i32 %x) { +; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = sitofp i32 %x to double + ret double %a +} + +define double @cvt_f64_s64(i64 %x) { +; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %a = sitofp i64 %x to double + ret double %a +} diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll new file mode 100644 index 0000000..20716f9 --- /dev/null +++ b/test/CodeGen/NVPTX/convert-int-sm10.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s + + +; i16 + +define i16 @cvt_i16_i32(i32 %x) { +; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = trunc i32 %x to i16 + ret i16 %a +} + +define i16 @cvt_i16_i64(i64 %x) { +; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %a = trunc i64 %x to i16 + ret i16 %a +} + + + +; i32 + +define i32 @cvt_i32_i16(i16 %x) { +; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %a = zext i16 %x to i32 + ret i32 %a +} + +define i32 @cvt_i32_i64(i64 %x) { +; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}} +; CHECK: ret + %a = trunc i64 %x to i32 + ret i32 %a +} + + + +; i64 + +define i64 @cvt_i64_i16(i16 %x) { +; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: ret + %a = zext i16 %x to i64 + ret i64 %a +} + +define i64 @cvt_i64_i32(i32 %x) { +; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = zext i32 %x to i64 + ret i64 %a +} diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll new file mode 100644 index 0000000..fad240e --- /dev/null +++ b/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + +;; Integer conversions happen inplicitly by loading/storing the proper types + + +; i16 + +define i16 @cvt_i16_i32(i32 %x) { +; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] +; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ret + %a = trunc i32 %x to i16 + ret i16 %a +} + +define i16 @cvt_i16_i64(i64 %x) { +; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] +; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ret + %a = trunc i64 %x to i16 + ret i16 %a +} + + + +; i32 + +define i32 @cvt_i32_i16(i16 %x) { +; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: ret + %a = zext i16 %x to i32 + ret i32 %a +} + +define i32 @cvt_i32_i64(i64 %x) { +; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: ret + %a = trunc i64 %x to i32 + ret i32 %a +} + + + +; i64 + +define i64 @cvt_i64_i16(i16 %x) { +; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}] +; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]] +; CHECK: ret + %a = zext i16 %x to i64 + ret i64 %a +} + +define i64 @cvt_i64_i32(i32 %x) { +; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}] +; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]] +; CHECK: ret + %a = zext i32 %x to i64 + ret i64 %a +} diff --git a/test/CodeGen/NVPTX/fma-disable.ll b/test/CodeGen/NVPTX/fma-disable.ll new file mode 100644 index 0000000..bdd7401 --- /dev/null +++ b/test/CodeGen/NVPTX/fma-disable.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL + +define ptx_device float @test_mul_add_f(float %x, float %y, float %z) { +entry: +; FMA: fma.rn.f32 +; MUL: mul.rn.f32 +; MUL: add.rn.f32 + %a = fmul float %x, %y + %b = fadd float %a, %z + ret float %b +} + +define ptx_device double @test_mul_add_d(double %x, double %y, double %z) { +entry: +; FMA: fma.rn.f64 +; MUL: mul.rn.f64 +; MUL: add.rn.f64 + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll new file mode 100644 index 0000000..4ef1a9a --- /dev/null +++ b/test/CodeGen/NVPTX/fma.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y, float %z) { +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + ret float %b +} + +define ptx_device double @t1_f64(double %x, double %y, double %z) { +; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}; +; CHECK: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll new file mode 100644 index 0000000..1c9879c --- /dev/null +++ b/test/CodeGen/NVPTX/intrinsic-old.ll @@ -0,0 +1,284 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +define ptx_device i32 @test_tid_x() { +; CHECK: mov.u32 %r0, %tid.x; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.tid.x() + ret i32 %x +} + +define ptx_device i32 @test_tid_y() { +; CHECK: mov.u32 %r0, %tid.y; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.tid.y() + ret i32 %x +} + +define ptx_device i32 @test_tid_z() { +; CHECK: mov.u32 %r0, %tid.z; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.tid.z() + ret i32 %x +} + +define ptx_device i32 @test_tid_w() { +; CHECK: mov.u32 %r0, %tid.w; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.tid.w() + ret i32 %x +} + +define ptx_device i32 @test_ntid_x() { +; CHECK: mov.u32 %r0, %ntid.x; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ntid.x() + ret i32 %x +} + +define ptx_device i32 @test_ntid_y() { +; CHECK: mov.u32 %r0, %ntid.y; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ntid.y() + ret i32 %x +} + +define ptx_device i32 @test_ntid_z() { +; CHECK: mov.u32 %r0, %ntid.z; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ntid.z() + ret i32 %x +} + +define ptx_device i32 @test_ntid_w() { +; CHECK: mov.u32 %r0, %ntid.w; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ntid.w() + ret i32 %x +} + +define ptx_device i32 @test_laneid() { +; CHECK: mov.u32 %r0, %laneid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.laneid() + ret i32 %x +} + +define ptx_device i32 @test_warpid() { +; CHECK: mov.u32 %r0, %warpid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.warpid() + ret i32 %x +} + +define ptx_device i32 @test_nwarpid() { +; CHECK: mov.u32 %r0, %nwarpid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nwarpid() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_x() { +; CHECK: mov.u32 %r0, %ctaid.x; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_y() { +; CHECK: mov.u32 %r0, %ctaid.y; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_z() { +; CHECK: mov.u32 %r0, %ctaid.z; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_w() { +; CHECK: mov.u32 %r0, %ctaid.w; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.ctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_x() { +; CHECK: mov.u32 %r0, %nctaid.x; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_y() { +; CHECK: mov.u32 %r0, %nctaid.y; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_z() { +; CHECK: mov.u32 %r0, %nctaid.z; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_w() { +; CHECK: mov.u32 %r0, %nctaid.w; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_smid() { +; CHECK: mov.u32 %r0, %smid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.smid() + ret i32 %x +} + +define ptx_device i32 @test_nsmid() { +; CHECK: mov.u32 %r0, %nsmid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nsmid() + ret i32 %x +} + +define ptx_device i32 @test_gridid() { +; CHECK: mov.u32 %r0, %gridid; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.gridid() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_eq() { +; CHECK: mov.u32 %r0, %lanemask_eq; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.lanemask.eq() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_le() { +; CHECK: mov.u32 %r0, %lanemask_le; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.lanemask.le() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_lt() { +; CHECK: mov.u32 %r0, %lanemask_lt; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.lanemask.lt() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_ge() { +; CHECK: mov.u32 %r0, %lanemask_ge; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.lanemask.ge() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_gt() { +; CHECK: mov.u32 %r0, %lanemask_gt; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.lanemask.gt() + ret i32 %x +} + +define ptx_device i32 @test_clock() { +; CHECK: mov.u32 %r0, %clock; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.clock() + ret i32 %x +} + +define ptx_device i64 @test_clock64() { +; CHECK: mov.u64 %rl0, %clock64; +; CHECK: ret; + %x = call i64 @llvm.ptx.read.clock64() + ret i64 %x +} + +define ptx_device i32 @test_pm0() { +; CHECK: mov.u32 %r0, %pm0; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.pm0() + ret i32 %x +} + +define ptx_device i32 @test_pm1() { +; CHECK: mov.u32 %r0, %pm1; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.pm1() + ret i32 %x +} + +define ptx_device i32 @test_pm2() { +; CHECK: mov.u32 %r0, %pm2; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.pm2() + ret i32 %x +} + +define ptx_device i32 @test_pm3() { +; CHECK: mov.u32 %r0, %pm3; +; CHECK: ret; + %x = call i32 @llvm.ptx.read.pm3() + ret i32 %x +} + +define ptx_device void @test_bar_sync() { +; CHECK: bar.sync 0 +; CHECK: ret; + call void @llvm.ptx.bar.sync(i32 0) + ret void +} + +declare i32 @llvm.ptx.read.tid.x() +declare i32 @llvm.ptx.read.tid.y() +declare i32 @llvm.ptx.read.tid.z() +declare i32 @llvm.ptx.read.tid.w() +declare i32 @llvm.ptx.read.ntid.x() +declare i32 @llvm.ptx.read.ntid.y() +declare i32 @llvm.ptx.read.ntid.z() +declare i32 @llvm.ptx.read.ntid.w() + +declare i32 @llvm.ptx.read.laneid() +declare i32 @llvm.ptx.read.warpid() +declare i32 @llvm.ptx.read.nwarpid() + +declare i32 @llvm.ptx.read.ctaid.x() +declare i32 @llvm.ptx.read.ctaid.y() +declare i32 @llvm.ptx.read.ctaid.z() +declare i32 @llvm.ptx.read.ctaid.w() +declare i32 @llvm.ptx.read.nctaid.x() +declare i32 @llvm.ptx.read.nctaid.y() +declare i32 @llvm.ptx.read.nctaid.z() +declare i32 @llvm.ptx.read.nctaid.w() + +declare i32 @llvm.ptx.read.smid() +declare i32 @llvm.ptx.read.nsmid() +declare i32 @llvm.ptx.read.gridid() + +declare i32 @llvm.ptx.read.lanemask.eq() +declare i32 @llvm.ptx.read.lanemask.le() +declare i32 @llvm.ptx.read.lanemask.lt() +declare i32 @llvm.ptx.read.lanemask.ge() +declare i32 @llvm.ptx.read.lanemask.gt() + +declare i32 @llvm.ptx.read.clock() +declare i64 @llvm.ptx.read.clock64() + +declare i32 @llvm.ptx.read.pm0() +declare i32 @llvm.ptx.read.pm1() +declare i32 @llvm.ptx.read.pm2() +declare i32 @llvm.ptx.read.pm3() + +declare void @llvm.ptx.bar.sync(i32 %i) diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll new file mode 100644 index 0000000..afab60c --- /dev/null +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +define ptx_device float @test_fabsf(float %f) { +; CHECK: abs.f32 %f0, %f0; +; CHECK: ret; + %x = call float @llvm.fabs.f32(float %f) + ret float %x +} + +define ptx_device double @test_fabs(double %d) { +; CHECK: abs.f64 %fl0, %fl0; +; CHECK: ret; + %x = call double @llvm.fabs.f64(double %d) + ret double %x +} + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll new file mode 100644 index 0000000..d1f5093d --- /dev/null +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -0,0 +1,173 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + + +;; i8 +define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { +; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i8 addrspace(1)* %ptr + ret i8 %a +} + +define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { +; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i8 addrspace(3)* %ptr + ret i8 %a +} + +define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { +; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i8 addrspace(5)* %ptr + ret i8 %a +} + +;; i16 +define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { +; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i16 addrspace(1)* %ptr + ret i16 %a +} + +define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { +; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i16 addrspace(3)* %ptr + ret i16 %a +} + +define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { +; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i16 addrspace(5)* %ptr + ret i16 %a +} + +;; i32 +define i32 @ld_global_i32(i32 addrspace(1)* %ptr) { +; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i32 addrspace(1)* %ptr + ret i32 %a +} + +define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) { +; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i32 addrspace(3)* %ptr + ret i32 %a +} + +define i32 @ld_local_i32(i32 addrspace(5)* %ptr) { +; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i32 addrspace(5)* %ptr + ret i32 %a +} + +;; i64 +define i64 @ld_global_i64(i64 addrspace(1)* %ptr) { +; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i64 addrspace(1)* %ptr + ret i64 %a +} + +define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) { +; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i64 addrspace(3)* %ptr + ret i64 %a +} + +define i64 @ld_local_i64(i64 addrspace(5)* %ptr) { +; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i64 addrspace(5)* %ptr + ret i64 %a +} + +;; f32 +define float @ld_global_f32(float addrspace(1)* %ptr) { +; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load float addrspace(1)* %ptr + ret float %a +} + +define float @ld_shared_f32(float addrspace(3)* %ptr) { +; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load float addrspace(3)* %ptr + ret float %a +} + +define float @ld_local_f32(float addrspace(5)* %ptr) { +; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load float addrspace(5)* %ptr + ret float %a +} + +;; f64 +define double @ld_global_f64(double addrspace(1)* %ptr) { +; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load double addrspace(1)* %ptr + ret double %a +} + +define double @ld_shared_f64(double addrspace(3)* %ptr) { +; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load double addrspace(3)* %ptr + ret double %a +} + +define double @ld_local_f64(double addrspace(5)* %ptr) { +; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load double addrspace(5)* %ptr + ret double %a +} diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll new file mode 100644 index 0000000..81a5216 --- /dev/null +++ b/test/CodeGen/NVPTX/ld-generic.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + + +;; i8 +define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i8 addrspace(0)* %ptr + ret i8 %a +} + +;; i16 +define i16 @ld_global_i16(i16 addrspace(0)* %ptr) { +; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i16 addrspace(0)* %ptr + ret i16 %a +} + +;; i32 +define i32 @ld_global_i32(i32 addrspace(0)* %ptr) { +; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i32 addrspace(0)* %ptr + ret i32 %a +} + +;; i64 +define i64 @ld_global_i64(i64 addrspace(0)* %ptr) { +; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load i64 addrspace(0)* %ptr + ret i64 %a +} + +;; f32 +define float @ld_global_f32(float addrspace(0)* %ptr) { +; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load float addrspace(0)* %ptr + ret float %a +} + +;; f64 +define double @ld_global_f64(double addrspace(0)* %ptr) { +; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ret +; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ret + %a = load double addrspace(0)* %ptr + ret double %a +} diff --git a/test/CodeGen/NVPTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg new file mode 100644 index 0000000..7180c84 --- /dev/null +++ b/test/CodeGen/NVPTX/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'NVPTX' in targets: + config.unsupported = True diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll new file mode 100644 index 0000000..ab6f423 --- /dev/null +++ b/test/CodeGen/NVPTX/simple-call.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + + +; CHECK: .func ({{.*}}) device_func +define float @device_func(float %a) noinline { + %ret = fmul float %a, %a + ret float %ret +} + +; CHECK: .entry kernel_func +define void @kernel_func(float* %a) { + %val = load float* %a +; CHECK: call.uni (retval0), +; CHECK: device_func, + %mul = call float @device_func(float %val) + store float %mul, float* %a + ret void +} + + + +!nvvm.annotations = !{!1} + +!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll new file mode 100644 index 0000000..54e04ae --- /dev/null +++ b/test/CodeGen/NVPTX/st-addrspace.ll @@ -0,0 +1,179 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + + +;; i8 + +define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) { +; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}} +; PTX64: ret + store i8 %a, i8 addrspace(1)* %ptr + ret void +} + +define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) { +; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}} +; PTX64: ret + store i8 %a, i8 addrspace(3)* %ptr + ret void +} + +define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) { +; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}} +; PTX64: ret + store i8 %a, i8 addrspace(5)* %ptr + ret void +} + +;; i16 + +define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) { +; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}} +; PTX64: ret + store i16 %a, i16 addrspace(1)* %ptr + ret void +} + +define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) { +; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}} +; PTX64: ret + store i16 %a, i16 addrspace(3)* %ptr + ret void +} + +define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) { +; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}} +; PTX64: ret + store i16 %a, i16 addrspace(5)* %ptr + ret void +} + +;; i32 + +define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) { +; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}} +; PTX64: ret + store i32 %a, i32 addrspace(1)* %ptr + ret void +} + +define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) { +; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}} +; PTX64: ret + store i32 %a, i32 addrspace(3)* %ptr + ret void +} + +define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) { +; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}} +; PTX64: ret + store i32 %a, i32 addrspace(5)* %ptr + ret void +} + +;; i64 + +define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) { +; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}} +; PTX64: ret + store i64 %a, i64 addrspace(1)* %ptr + ret void +} + +define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) { +; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}} +; PTX64: ret + store i64 %a, i64 addrspace(3)* %ptr + ret void +} + +define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) { +; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}} +; PTX64: ret + store i64 %a, i64 addrspace(5)* %ptr + ret void +} + +;; f32 + +define void @st_global_f32(float addrspace(1)* %ptr, float %a) { +; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}} +; PTX64: ret + store float %a, float addrspace(1)* %ptr + ret void +} + +define void @st_shared_f32(float addrspace(3)* %ptr, float %a) { +; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}} +; PTX64: ret + store float %a, float addrspace(3)* %ptr + ret void +} + +define void @st_local_f32(float addrspace(5)* %ptr, float %a) { +; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}} +; PTX64: ret + store float %a, float addrspace(5)* %ptr + ret void +} + +;; f64 + +define void @st_global_f64(double addrspace(1)* %ptr, double %a) { +; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}} +; PTX32: ret +; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}} +; PTX64: ret + store double %a, double addrspace(1)* %ptr + ret void +} + +define void @st_shared_f64(double addrspace(3)* %ptr, double %a) { +; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}} +; PTX32: ret +; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}} +; PTX64: ret + store double %a, double addrspace(3)* %ptr + ret void +} + +define void @st_local_f64(double addrspace(5)* %ptr, double %a) { +; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}} +; PTX32: ret +; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}} +; PTX64: ret + store double %a, double addrspace(5)* %ptr + ret void +} diff --git a/test/CodeGen/NVPTX/st-generic.ll b/test/CodeGen/NVPTX/st-generic.ll new file mode 100644 index 0000000..59a1fe0 --- /dev/null +++ b/test/CodeGen/NVPTX/st-generic.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + + +;; i8 + +define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) { +; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}} +; PTX32: ret +; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}} +; PTX64: ret + store i8 %a, i8 addrspace(0)* %ptr + ret void +} + +;; i16 + +define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) { +; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}} +; PTX32: ret +; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}} +; PTX64: ret + store i16 %a, i16 addrspace(0)* %ptr + ret void +} + +;; i32 + +define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) { +; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}} +; PTX32: ret +; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}} +; PTX64: ret + store i32 %a, i32 addrspace(0)* %ptr + ret void +} + +;; i64 + +define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) { +; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}} +; PTX32: ret +; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}} +; PTX64: ret + store i64 %a, i64 addrspace(0)* %ptr + ret void +} + +;; f32 + +define void @st_global_f32(float addrspace(0)* %ptr, float %a) { +; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}} +; PTX32: ret +; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}} +; PTX64: ret + store float %a, float addrspace(0)* %ptr + ret void +} + +;; f64 + +define void @st_global_f64(double addrspace(0)* %ptr, double %a) { +; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}} +; PTX32: ret +; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}} +; PTX64: ret + store double %a, double addrspace(0)* %ptr + ret void +} |