diff options
-rw-r--r-- | CMakeLists.txt | 15 | ||||
-rw-r--r-- | doc/compile.md | 4 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 154 |
3 files changed, 98 insertions, 75 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 09c7cec..33e97ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,15 +39,18 @@ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${BUILD_TYPE}") set(XMR-STAK_CURRENCY "all" CACHE STRING "select miner currency") set_property(CACHE XMR-STAK_CURRENCY PROPERTY STRINGS "all;monero;aeon") -if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture") - set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic") - if("${XMR-STAK_COMPILE}" STREQUAL "native") + +set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture") +set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic") +if("${XMR-STAK_COMPILE}" STREQUAL "native") + if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC") set(CMAKE_CXX_FLAGS "-march=native -mtune=native ${CMAKE_CXX_FLAGS}") set(CMAKE_C_FLAGS "-march=native -mtune=native ${CMAKE_C_FLAGS}") - elseif(NOT "${XMR-STAK_COMPILE}" STREQUAL "generic") - message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'") endif() +elseif("${XMR-STAK_COMPILE}" STREQUAL "generic") + add_definitions("-DCONF_ENFORCE_OpenCL_1_2=1") +else() + message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'") endif() if("${XMR-STAK_CURRENCY}" STREQUAL "all") diff --git a/doc/compile.md b/doc/compile.md index 30cf58c..cf25135 100644 --- a/doc/compile.md +++ b/doc/compile.md @@ -37,7 +37,7 @@ After the configuration you need to compile the miner, follow the guide for your - you can find the binary and the `config.txt` file after `make install` in `$HOME/xmr-stak-cpu/bin` - `CMAKE_LINK_STATIC` link libgcc and libstdc++ libraries static (default OFF) - disable with `cmake .. -DCMAKE_LINK_STATIC=ON` - - if you use static compile to run the miner on another system set `-DXMR-STAK_COMPILE=generic` (only available for LINUX, BSD and MacOSX) + - if you use static compile to run the miner on another system set `-DXMR-STAK_COMPILE=generic` - `CMAKE_BUILD_TYPE` set the build type - valid options: `Release` or `Debug` - you should always keep `Release` for your productive miners @@ -46,8 +46,6 @@ After the configuration you need to compile the miner, follow the guide for your - `OpenSSL_ENABLE` allow to disable/enable the dependency *OpenSSL* - it is not possible to connect to a *https* secured pool if option is disabled: `cmake .. -DOpenSSL_ENABLE=OFF` - `XMR-STAK_CURRENCY` - compile for Monero(XMR) or Aeon(AEON) usage only e.g. `cmake .. -DXMR-STAK_CURRENCY=monero` - -### only available for LINUX, BSD and MacOSX - `XMR-STAK_COMPILE` select the CPU compute architecture (default: native) - native means the miner binary can be used only on the system where it is compiled but will archive the highest hash rate - use `cmake .. -DXMR-STAK_COMPILE=generic` to run the miner on all CPU's with sse2 diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 791ceb2..15b8457 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -172,7 +172,7 @@ const char* err_to_str(cl_int ret) return "CL_INVALID_LINKER_OPTIONS"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; -#ifdef CL_VERSION_2_0 +#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2) case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE"; case CL_INVALID_DEVICE_QUEUE: @@ -226,8 +226,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } + /* Some kernel spawn 8 times more threads than the user is configuring. + * To give the user the correct maximum work size we divide the hardware specific max by 8. + */ + MaximumWorkSize /= 8; printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize); -#ifdef CL_VERSION_2_0 +#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2) const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 }; ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret); #else @@ -432,8 +436,11 @@ uint32_t getNumPlatforms() // Get platform and device information clStatus = clGetPlatformIDs(0, NULL, &num_platforms); - platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms); - clStatus = clGetPlatformIDs(num_platforms, platforms, NULL); + if(clStatus != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for number of platforms.", err_to_str(clStatus)); + return 0u; + } return num_platforms; } @@ -448,39 +455,60 @@ std::vector<GpuContext> getAMDDevices(int index) uint32_t numPlatforms = getNumPlatforms(); - - platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); - clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); - - clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); - device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); - clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); - for (int k = 0; k < num_devices; k++) { - cl_int clError; - std::vector<char> devVendorVec(1024); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL); - std::string devVendor(devVendorVec.data()); - if( devVendor.find("Advanced Micro Devices") != std::string::npos) + if(numPlatforms) + { + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(clStatus == CL_SUCCESS) { - GpuContext ctx; - ctx.deviceIdx = k; - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL); - size_t maxMem; - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL); - // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory - ctx.freeMem = std::min(ctx.freeMem, maxMem); - std::vector<char> devNameVec(1024); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL); - ctx.name = std::string(devNameVec.data()); - printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); - ctx.DeviceID = device_list[k]; - ctxVec.push_back(ctx); + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + if(clStatus == CL_SUCCESS) + { + device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); + if(clStatus == CL_SUCCESS) + { + for (int k = 0; k < num_devices; k++) + { + cl_int clError; + std::vector<char> devVendorVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL); + if(clStatus == CL_SUCCESS) + { + std::string devVendor(devVendorVec.data()); + if( devVendor.find("Advanced Micro Devices") != std::string::npos) + { + GpuContext ctx; + ctx.deviceIdx = k; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL); + size_t maxMem; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL); + // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory + ctx.freeMem = std::min(ctx.freeMem, maxMem); + std::vector<char> devNameVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL); + ctx.name = std::string(devNameVec.data()); + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); + ctx.DeviceID = device_list[k]; + ctxVec.push_back(ctx); + } + } + else + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name.", err_to_str(clStatus)); + } + } + else + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus)); + free(device_list); + } + else + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus)); } + else + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); } - - - free(device_list); + free(platforms); return ctxVec; @@ -504,20 +532,25 @@ int getAMDPlatformIdx() int platformIndex = -1; - for (int i = 0; i < numPlatforms; i++) { - size_t infoSize; - clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); - std::vector<char> platformNameVec(infoSize); + if(clStatus == CL_SUCCESS) + { + for (int i = 0; i < numPlatforms; i++) { + size_t infoSize; + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); - clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); - std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos) - { - platformIndex = i; - printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); - break; + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") != std::string::npos) + { + platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); + break; + } } } + else + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); free(platforms); return platformIndex; @@ -547,27 +580,6 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_STUPID_PARAMS; } - - - cl_platform_id * platforms = NULL; - cl_int clStatus; - uint32_t numPlatforms = getNumPlatforms(); - - platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); - clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); - - size_t infoSize; - clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); - std::vector<char> platformNameVec(infoSize); - clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); - std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") == std::string::npos) - { - printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); - } - - free(platforms); - /*MSVC skimping on devel costs by shoehorning C99 to be a subset of C++? Noooo... can't be.*/ #ifdef __GNUC__ cl_platform_id PlatformIDList[entries]; @@ -580,6 +592,16 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_OCL_API; } + size_t infoSize; + clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); + clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") == std::string::npos) + { + printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); + } + if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret)); |