summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt15
-rw-r--r--doc/compile.md4
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.cpp154
3 files changed, 98 insertions, 75 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09c7cec..33e97ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,15 +39,18 @@ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${BUILD_TYPE}")
set(XMR-STAK_CURRENCY "all" CACHE STRING "select miner currency")
set_property(CACHE XMR-STAK_CURRENCY PROPERTY STRINGS "all;monero;aeon")
-if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture")
- set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic")
- if("${XMR-STAK_COMPILE}" STREQUAL "native")
+
+set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture")
+set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic")
+if("${XMR-STAK_COMPILE}" STREQUAL "native")
+ if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_CXX_FLAGS "-march=native -mtune=native ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=native -mtune=native ${CMAKE_C_FLAGS}")
- elseif(NOT "${XMR-STAK_COMPILE}" STREQUAL "generic")
- message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'")
endif()
+elseif("${XMR-STAK_COMPILE}" STREQUAL "generic")
+ add_definitions("-DCONF_ENFORCE_OpenCL_1_2=1")
+else()
+ message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'")
endif()
if("${XMR-STAK_CURRENCY}" STREQUAL "all")
diff --git a/doc/compile.md b/doc/compile.md
index 30cf58c..cf25135 100644
--- a/doc/compile.md
+++ b/doc/compile.md
@@ -37,7 +37,7 @@ After the configuration you need to compile the miner, follow the guide for your
- you can find the binary and the `config.txt` file after `make install` in `$HOME/xmr-stak-cpu/bin`
- `CMAKE_LINK_STATIC` link libgcc and libstdc++ libraries static (default OFF)
- disable with `cmake .. -DCMAKE_LINK_STATIC=ON`
- - if you use static compile to run the miner on another system set `-DXMR-STAK_COMPILE=generic` (only available for LINUX, BSD and MacOSX)
+ - if you use static compile to run the miner on another system set `-DXMR-STAK_COMPILE=generic`
- `CMAKE_BUILD_TYPE` set the build type
- valid options: `Release` or `Debug`
- you should always keep `Release` for your productive miners
@@ -46,8 +46,6 @@ After the configuration you need to compile the miner, follow the guide for your
- `OpenSSL_ENABLE` allow to disable/enable the dependency *OpenSSL*
- it is not possible to connect to a *https* secured pool if option is disabled: `cmake .. -DOpenSSL_ENABLE=OFF`
- `XMR-STAK_CURRENCY` - compile for Monero(XMR) or Aeon(AEON) usage only e.g. `cmake .. -DXMR-STAK_CURRENCY=monero`
-
-### only available for LINUX, BSD and MacOSX
- `XMR-STAK_COMPILE` select the CPU compute architecture (default: native)
- native means the miner binary can be used only on the system where it is compiled but will archive the highest hash rate
- use `cmake .. -DXMR-STAK_COMPILE=generic` to run the miner on all CPU's with sse2
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 791ceb2..15b8457 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -172,7 +172,7 @@ const char* err_to_str(cl_int ret)
return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
-#ifdef CL_VERSION_2_0
+#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2)
case CL_INVALID_PIPE_SIZE:
return "CL_INVALID_PIPE_SIZE";
case CL_INVALID_DEVICE_QUEUE:
@@ -226,8 +226,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
return ERR_OCL_API;
}
+ /* Some kernel spawn 8 times more threads than the user is configuring.
+ * To give the user the correct maximum work size we divide the hardware specific max by 8.
+ */
+ MaximumWorkSize /= 8;
printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
-#ifdef CL_VERSION_2_0
+#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2)
const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 };
ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
#else
@@ -432,8 +436,11 @@ uint32_t getNumPlatforms()
// Get platform and device information
clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
- platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
- clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
+ if(clStatus != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for number of platforms.", err_to_str(clStatus));
+ return 0u;
+ }
return num_platforms;
}
@@ -448,39 +455,60 @@ std::vector<GpuContext> getAMDDevices(int index)
uint32_t numPlatforms = getNumPlatforms();
-
- platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
- clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
-
- clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
- device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices);
- clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
- for (int k = 0; k < num_devices; k++) {
- cl_int clError;
- std::vector<char> devVendorVec(1024);
- clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL);
- std::string devVendor(devVendorVec.data());
- if( devVendor.find("Advanced Micro Devices") != std::string::npos)
+ if(numPlatforms)
+ {
+ platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
+ clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
+ if(clStatus == CL_SUCCESS)
{
- GpuContext ctx;
- ctx.deviceIdx = k;
- clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL);
- size_t maxMem;
- clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL);
- clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL);
- // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory
- ctx.freeMem = std::min(ctx.freeMem, maxMem);
- std::vector<char> devNameVec(1024);
- clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL);
- ctx.name = std::string(devNameVec.data());
- printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
- ctx.DeviceID = device_list[k];
- ctxVec.push_back(ctx);
+ clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ if(clStatus == CL_SUCCESS)
+ {
+ device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices);
+ clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
+ if(clStatus == CL_SUCCESS)
+ {
+ for (int k = 0; k < num_devices; k++)
+ {
+ cl_int clError;
+ std::vector<char> devVendorVec(1024);
+ clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL);
+ if(clStatus == CL_SUCCESS)
+ {
+ std::string devVendor(devVendorVec.data());
+ if( devVendor.find("Advanced Micro Devices") != std::string::npos)
+ {
+ GpuContext ctx;
+ ctx.deviceIdx = k;
+ clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL);
+ size_t maxMem;
+ clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL);
+ clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL);
+ // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory
+ ctx.freeMem = std::min(ctx.freeMem, maxMem);
+ std::vector<char> devNameVec(1024);
+ clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL);
+ ctx.name = std::string(devNameVec.data());
+ printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
+ ctx.DeviceID = device_list[k];
+ ctxVec.push_back(ctx);
+ }
+ }
+ else
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name.", err_to_str(clStatus));
+ }
+ }
+ else
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus));
+ free(device_list);
+ }
+ else
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus));
}
+ else
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
}
-
-
- free(device_list);
+
free(platforms);
return ctxVec;
@@ -504,20 +532,25 @@ int getAMDPlatformIdx()
int platformIndex = -1;
- for (int i = 0; i < numPlatforms; i++) {
- size_t infoSize;
- clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
- std::vector<char> platformNameVec(infoSize);
+ if(clStatus == CL_SUCCESS)
+ {
+ for (int i = 0; i < numPlatforms; i++) {
+ size_t infoSize;
+ clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
+ std::vector<char> platformNameVec(infoSize);
- clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
- std::string platformName(platformNameVec.data());
- if( platformName.find("Advanced Micro Devices") != std::string::npos)
- {
- platformIndex = i;
- printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
- break;
+ clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
+ std::string platformName(platformNameVec.data());
+ if( platformName.find("Advanced Micro Devices") != std::string::npos)
+ {
+ platformIndex = i;
+ printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
+ break;
+ }
}
}
+ else
+ printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
free(platforms);
return platformIndex;
@@ -547,27 +580,6 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
return ERR_STUPID_PARAMS;
}
-
-
- cl_platform_id * platforms = NULL;
- cl_int clStatus;
- uint32_t numPlatforms = getNumPlatforms();
-
- platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
- clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
-
- size_t infoSize;
- clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
- std::vector<char> platformNameVec(infoSize);
- clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
- std::string platformName(platformNameVec.data());
- if( platformName.find("Advanced Micro Devices") == std::string::npos)
- {
- printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
- }
-
- free(platforms);
-
/*MSVC skimping on devel costs by shoehorning C99 to be a subset of C++? Noooo... can't be.*/
#ifdef __GNUC__
cl_platform_id PlatformIDList[entries];
@@ -580,6 +592,16 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
return ERR_OCL_API;
}
+ size_t infoSize;
+ clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
+ std::vector<char> platformNameVec(infoSize);
+ clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
+ std::string platformName(platformNameVec.data());
+ if( platformName.find("Advanced Micro Devices") == std::string::npos)
+ {
+ printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
+ }
+
if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret));
OpenPOWER on IntegriCloud