diff options
author | dim <dim@FreeBSD.org> | 2015-05-27 18:47:56 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-05-27 18:47:56 +0000 |
commit | 3191b2b32a96e1a6ee833fcca73e5c8e0c67ba65 (patch) | |
tree | dbbd4047878da71c1a706e26ce05b4e7791b14cc /test/CodeGenCUDA | |
parent | 38d6f2e7f2ce51a5b3836d26596c6c34a3288752 (diff) | |
download | FreeBSD-src-3191b2b32a96e1a6ee833fcca73e5c8e0c67ba65.zip FreeBSD-src-3191b2b32a96e1a6ee833fcca73e5c8e0c67ba65.tar.gz |
Vendor import of clang trunk r238337:
https://llvm.org/svn/llvm-project/cfe/trunk@238337
Diffstat (limited to 'test/CodeGenCUDA')
-rw-r--r-- | test/CodeGenCUDA/address-spaces.cu | 31 | ||||
-rw-r--r-- | test/CodeGenCUDA/cuda-builtin-vars.cu | 28 | ||||
-rw-r--r-- | test/CodeGenCUDA/device-stub.cu | 41 | ||||
-rw-r--r-- | test/CodeGenCUDA/filter-decl.cu | 6 | ||||
-rw-r--r-- | test/CodeGenCUDA/host-device-calls-host.cu | 32 | ||||
-rw-r--r-- | test/CodeGenCUDA/launch-bounds.cu | 51 |
6 files changed, 181 insertions, 8 deletions
diff --git a/test/CodeGenCUDA/address-spaces.cu b/test/CodeGenCUDA/address-spaces.cu index b808206..b0ef355 100644 --- a/test/CodeGenCUDA/address-spaces.cu +++ b/test/CodeGenCUDA/address-spaces.cu @@ -27,25 +27,25 @@ struct MyStruct { // CHECK: @b = addrspace(3) global float 0.000000e+00 __device__ void foo() { - // CHECK: load i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) i++; - // CHECK: load i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) j++; - // CHECK: load i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) k++; static int li; - // CHECK: load i32* addrspacecast (i32 addrspace(1)* @_ZZ3foovE2li to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @_ZZ3foovE2li to i32*) li++; __constant__ int lj; - // CHECK: load i32* addrspacecast (i32 addrspace(4)* @_ZZ3foovE2lj to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @_ZZ3foovE2lj to i32*) lj++; __shared__ int lk; - // CHECK: load i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) lk++; } @@ -75,7 +75,7 @@ __device__ void func2() { *ap = 1.0f; } // CHECK: define void @_Z5func2v() -// CHECK: store float* getelementptr inbounds ([256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap +// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap __device__ void func3() { __shared__ float a; @@ -100,3 +100,20 @@ __device__ float *func5() { } // CHECK: define float* @_Z5func5v() // CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*) + +struct StructWithCtor { + __device__ StructWithCtor(): data(1) {} + __device__ StructWithCtor(const StructWithCtor &second): data(second.data) {} + __device__ int getData() { return data; } + int data; +}; + +__device__ int construct_shared_struct() { +// CHECK-LABEL: define i32 @_Z23construct_shared_structv() + __shared__ StructWithCtor s; +// CHECK: call void @_ZN14StructWithCtorC1Ev(%struct.StructWithCtor* addrspacecast (%struct.StructWithCtor addrspace(3)* @_ZZ23construct_shared_structvE1s to %struct.StructWithCtor*)) + __shared__ StructWithCtor t(s); +// CHECK: call void @_ZN14StructWithCtorC1ERKS_(%struct.StructWithCtor* addrspacecast (%struct.StructWithCtor addrspace(3)* @_ZZ23construct_shared_structvE1t to %struct.StructWithCtor*), %struct.StructWithCtor* dereferenceable(4) addrspacecast (%struct.StructWithCtor addrspace(3)* @_ZZ23construct_shared_structvE1s to %struct.StructWithCtor*)) + return t.getData(); +// CHECK: call i32 @_ZN14StructWithCtor7getDataEv(%struct.StructWithCtor* addrspacecast (%struct.StructWithCtor addrspace(3)* @_ZZ23construct_shared_structvE1t to %struct.StructWithCtor*)) +} diff --git a/test/CodeGenCUDA/cuda-builtin-vars.cu b/test/CodeGenCUDA/cuda-builtin-vars.cu new file mode 100644 index 0000000..834e16d --- /dev/null +++ b/test/CodeGenCUDA/cuda-builtin-vars.cu @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#include "cuda_builtin_vars.h" + +// CHECK: define void @_Z6kernelPi(i32* %out) +__attribute__((global)) +void kernel(int *out) { + int i = 0; + out[i++] = threadIdx.x; // CHECK: call i32 @llvm.ptx.read.tid.x() + out[i++] = threadIdx.y; // CHECK: call i32 @llvm.ptx.read.tid.y() + out[i++] = threadIdx.z; // CHECK: call i32 @llvm.ptx.read.tid.z() + + out[i++] = blockIdx.x; // CHECK: call i32 @llvm.ptx.read.ctaid.x() + out[i++] = blockIdx.y; // CHECK: call i32 @llvm.ptx.read.ctaid.y() + out[i++] = blockIdx.z; // CHECK: call i32 @llvm.ptx.read.ctaid.z() + + out[i++] = blockDim.x; // CHECK: call i32 @llvm.ptx.read.ntid.x() + out[i++] = blockDim.y; // CHECK: call i32 @llvm.ptx.read.ntid.y() + out[i++] = blockDim.z; // CHECK: call i32 @llvm.ptx.read.ntid.z() + + out[i++] = gridDim.x; // CHECK: call i32 @llvm.ptx.read.nctaid.x() + out[i++] = gridDim.y; // CHECK: call i32 @llvm.ptx.read.nctaid.y() + out[i++] = gridDim.z; // CHECK: call i32 @llvm.ptx.read.nctaid.z() + + out[i++] = warpSize; // CHECK: store i32 32, + + // CHECK: ret void +} diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu index ed94d10..7f5e159 100644 --- a/test/CodeGenCUDA/device-stub.cu +++ b/test/CodeGenCUDA/device-stub.cu @@ -1,7 +1,21 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s #include "Inputs/cuda.h" +// Make sure that all parts of GPU code init/cleanup are there: +// * constant unnamed string with the kernel name +// CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" +// * constant unnamed string with GPU binary +// CHECK: private unnamed_addr constant{{.*}}\00" +// * constant struct that wraps GPU binary +// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } +// CHECK: { i32 1180844977, i32 1, {{.*}}, i8* null } +// * variable to save GPU binary handle after initialization +// CHECK: @__cuda_gpubin_handle = internal global i8** null +// * Make sure our constructor/destructor was added to global ctor/dtor list. +// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor +// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor + // Test that we build the correct number of calls to cudaSetupArgument followed // by a call to cudaLaunch. @@ -11,3 +25,28 @@ // CHECK: call{{.*}}cudaSetupArgument // CHECK: call{{.*}}cudaLaunch __global__ void kernelfunc(int i, int j, int k) {} + +// Test that we've built correct kernel launch sequence. +// CHECK: define{{.*}}hostfunc +// CHECK: call{{.*}}cudaConfigureCall +// CHECK: call{{.*}}kernelfunc +void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } + +// Test that we've built a function to register kernels +// CHECK: define internal void @__cuda_register_kernels +// CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc + +// Test that we've built contructor.. +// CHECK: define internal void @__cuda_module_ctor +// .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper) +// CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper +// .. stores return value in __cuda_gpubin_handle +// CHECK-NEXT: store{{.*}}__cuda_gpubin_handle +// .. and then calls __cuda_register_kernels +// CHECK-NEXT: call void @__cuda_register_kernels + +// Test that we've created destructor. +// CHECK: define internal void @__cuda_module_dtor +// CHECK: load{{.*}}__cuda_gpubin_handle +// CHECK-NEXT: call void @__cudaUnregisterFatBinary + diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu index faaeb69..e69473f 100644 --- a/test/CodeGenCUDA/filter-decl.cu +++ b/test/CodeGenCUDA/filter-decl.cu @@ -3,6 +3,12 @@ #include "Inputs/cuda.h" +// This has to be at the top of the file as that's where file-scope +// asm ends up. +// CHECK-HOST: module asm "file scope asm is host only" +// CHECK-DEVICE-NOT: module asm "file scope asm is host only" +__asm__("file scope asm is host only"); + // CHECK-HOST-NOT: constantdata = global // CHECK-DEVICE: constantdata = global __constant__ char constantdata[256]; diff --git a/test/CodeGenCUDA/host-device-calls-host.cu b/test/CodeGenCUDA/host-device-calls-host.cu new file mode 100644 index 0000000..8140f61 --- /dev/null +++ b/test/CodeGenCUDA/host-device-calls-host.cu @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-allow-host-calls-from-host-device -fcuda-is-device -Wno-cuda-compat -emit-llvm -o - | FileCheck %s + +#include "Inputs/cuda.h" + +extern "C" +void host_function() {} + +// CHECK-LABEL: define void @hd_function_a +extern "C" +__host__ __device__ void hd_function_a() { + // CHECK: call void @host_function + host_function(); +} + +// CHECK: declare void @host_function + +// CHECK-LABEL: define void @hd_function_b +extern "C" +__host__ __device__ void hd_function_b(bool b) { if (b) host_function(); } + +// CHECK-LABEL: define void @device_function_b +extern "C" +__device__ void device_function_b() { hd_function_b(false); } + +// CHECK-LABEL: define void @global_function +extern "C" +__global__ void global_function() { + // CHECK: call void @device_function_b + device_function_b(); +} + +// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} diff --git a/test/CodeGenCUDA/launch-bounds.cu b/test/CodeGenCUDA/launch-bounds.cu index 6f4102e..ecbd0ad 100644 --- a/test/CodeGenCUDA/launch-bounds.cu +++ b/test/CodeGenCUDA/launch-bounds.cu @@ -28,3 +28,54 @@ Kernel2() } // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} + +template <int max_threads_per_block> +__global__ void +__launch_bounds__(max_threads_per_block) +Kernel3() +{ +} + +template void Kernel3<MAX_THREADS_PER_BLOCK>(); +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} + +template <int max_threads_per_block, int min_blocks_per_mp> +__global__ void +__launch_bounds__(max_threads_per_block, min_blocks_per_mp) +Kernel4() +{ +} +template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} + +const int constint = 100; +template <int max_threads_per_block, int min_blocks_per_mp> +__global__ void +__launch_bounds__(max_threads_per_block + constint, + min_blocks_per_mp + max_threads_per_block) +Kernel5() +{ +} +template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} + +// Make sure we don't emit negative launch bounds values. +__global__ void +__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) +Kernel6() +{ +} +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx", +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm", + +__global__ void +__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) +Kernel7() +{ +} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx", +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm", |