diff options
Diffstat (limited to 'xmrstak/backend/nvidia/nvcc_code')
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 91 |
1 files changed, 85 insertions, 6 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index e18532f..9923cb2 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -1,6 +1,9 @@ #include <stdio.h> #include <stdint.h> #include <string.h> +#include <sstream> +#include <algorithm> +#include <vector> #include <cuda.h> #include <cuda_runtime.h> #include <device_functions.hpp> @@ -270,6 +273,15 @@ extern "C" int cuda_get_devicecount( int* deviceCount) return 1; } +/** get device information + * + * @return 0 = all OK, + * 1 = something went wrong, + * 2 = gpu cannot be selected, + * 3 = context cannot be created + * 4 = not enough memory + * 5 = architecture not supported (not compiled for the gpu architecture) + */ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) { cudaError_t err; @@ -279,25 +291,25 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) if(err != cudaSuccess) { printf("Unable to query CUDA driver version! Is an nVidia driver installed?\n"); - return 0; + return 1; } if(version < CUDART_VERSION) { printf("Driver does not support CUDA %d.%d API! Update your nVidia driver!\n", CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10); - return 0; + return 1; } int GPU_N; if(cuda_get_devicecount(&GPU_N) == 0) { - return 0; + return 1; } if(ctx->device_id >= GPU_N) { printf("Invalid device ID!\n"); - return 0; + return 1; } cudaDeviceProp props; @@ -305,7 +317,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) if(err != cudaSuccess) { printf("\nGPU %d: %s\n%s line %d\n", ctx->device_id, cudaGetErrorString(err), __FILE__, __LINE__); - return 0; + return 1; } ctx->device_name = strdup(props.name); @@ -313,8 +325,52 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) ctx->device_arch[0] = props.major; ctx->device_arch[1] = props.minor; + const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1]; + ctx->name = std::string(props.name); + std::vector<int> arch; +#define XMRSTAK_PP_TOSTRING1(str) #str +#define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str) + char const * archStringList = XMRSTAK_PP_TOSTRING(XMRSTAK_CUDA_ARCH_LIST); +#undef XMRSTAK_PP_TOSTRING +#undef XMRSTAK_PP_TOSTRING1 + std::stringstream ss(archStringList); + + //transform string list sperated with `+` into a vector of integers + int tmpArch; + while ( ss >> tmpArch ) + arch.push_back( tmpArch ); + + if(gpuArch >= 20 && gpuArch < 30) + { + // compiled binary must support sm_20 for fermi + std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20); + if(it == arch.end()) + { + printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch); + return 5; + } + } + if(gpuArch >= 30) + { + // search the minimum architecture greater than sm_20 + int minSupportedArch = 0; + /* - for newer architecture than fermi we need at least sm_30 + * or a architecture >= gpuArch + * - it is not possible to use a gpu with a architecture >= 30 + * with a sm_20 only compiled binary + */ + for(int i = 0; i < arch.size(); ++i) + if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch)) + minSupportedArch = arch[i]; + if(minSupportedArch >= 30 && gpuArch <= minSupportedArch) + { + printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch); + return 5; + } + } + // set all evice option those marked as auto (-1) to a valid value if(ctx->device_blocks == -1) { @@ -347,9 +403,31 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) maxMemUsage = size_t(1024u) * byteToMiB; } + int* tmp; + cudaError_t err; + // a device must be selected to get the right memory usage later on + err = cudaSetDevice(ctx->device_id); + if(err != cudaSuccess) + { + printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id); + return 2; + } + // trigger that a context on the gpu will be allocated + err = cudaMalloc(&tmp, 256); + if(err != cudaSuccess) + { + printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id); + return 3; + } + + size_t freeMemory = 0; size_t totalMemory = 0; CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); + + cudaFree(tmp); + // delete created context on the gpu + cudaDeviceReset(); ctx->total_device_memory = totalMemory; ctx->free_device_memory = freeMemory; @@ -379,6 +457,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) printf("WARNING: NVIDIA GPU %d: already %s MiB memory in use, skip GPU.\n", ctx->device_id, std::to_string(usedMem/byteToMiB).c_str()); + return 4; } else maxMemUsage -= usedMem; @@ -404,5 +483,5 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) } - return 1; + return 0; } |