summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend
diff options
context:
space:
mode:
authorpsychocrypt <psychocryptHPC@gmail.com>2018-04-14 23:17:33 +0200
committerpsychocrypt <psychocryptHPC@gmail.com>2018-04-14 23:26:27 +0200
commit4682b28a5d304436ca20469e5089f97814f3f4ab (patch)
tree001fb739475e440047eb37b9dadb49b13ccb1a61 /xmrstak/backend
parent9158460bdc316fe9d6f1fb01095b5aff52c1acf8 (diff)
downloadxmr-stak-4682b28a5d304436ca20469e5089f97814f3f4ab.zip
xmr-stak-4682b28a5d304436ca20469e5089f97814f3f4ab.tar.gz
allow non AMD OpenCL driver and devices
- add CLI flag to explicitly use non AMD OpenCL and devices - adjust OpenCL output (use OpenCL instead of AMD if --altOpenCL is sued) - optimize NVIDIA OpenCL auto suggestion
Diffstat (limited to 'xmrstak/backend')
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.cpp30
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.hpp1
-rw-r--r--xmrstak/backend/amd/autoAdjust.hpp23
-rw-r--r--xmrstak/backend/amd/minethd.cpp6
-rw-r--r--xmrstak/backend/backendConnector.cpp5
5 files changed, 50 insertions, 15 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 79e80bd..9a4ba73 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -675,11 +675,18 @@ std::vector<GpuContext> getAMDDevices(int index)
}
std::string devVendor(devVendorVec.data());
- if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos)
+
+ bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos;
+ bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos;
+
+ std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
+ if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{
GpuContext ctx;
std::vector<char> devNameVec(1024);
size_t maxMem;
+ if( devVendor.find("NVIDIA Corporation") != std::string::npos)
+ ctx.isNVIDIA = true;
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS)
{
@@ -699,6 +706,10 @@ std::vector<GpuContext> getAMDDevices(int index)
continue;
}
+ // the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation
+ if(ctx.isNVIDIA)
+ maxMem = ctx.freeMem;
+
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
@@ -747,13 +758,15 @@ int getAMDPlatformIdx()
clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
- if( platformName.find("Advanced Micro Devices") != std::string::npos ||
+
+ bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos ||
platformName.find("Apple") != std::string::npos ||
- platformName.find("Mesa") != std::string::npos
- )
+ platformName.find("Mesa") != std::string::npos;
+ bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos;
+ std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
+ if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{
-
- printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
+ printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str());
if(platformName.find("Mesa") != std::string::npos)
mesaPlatform = i;
else
@@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
std::vector<char> platformNameVec(infoSize);
clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
- if( platformName.find("Advanced Micro Devices") == std::string::npos)
+ if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos)
{
printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
}
@@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize;
ctx[i].rawIntensity = reduced_intensity;
- printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity));
+ const std::string backendName = xmrstak::params::inst().openCLVendor;
+ printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity));
}
if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 0db6c90..5ab80b8 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -27,6 +27,7 @@ struct GpuContext
size_t workSize;
int stridedIndex;
int memChunk;
+ bool isNVIDIA = false;
int compMode;
/*Output vars*/
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 6df0eea..e7e98d4 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -91,6 +91,7 @@ private:
std::string conf;
for(auto& ctx : devVec)
{
+ size_t minFreeMem = 128u * byteToMiB;
/* 1000 is a magic selected limit, the reason is that more than 2GiB memory
* sowing down the memory performance because of TLB cache misses
*/
@@ -112,12 +113,26 @@ private:
*/
maxThreads = 2024u;
}
+
+ // NVIDIA optimizations
+ if(
+ ctx.isNVIDIA && (
+ ctx.name.find("P100") != std::string::npos ||
+ ctx.name.find("V100") != std::string::npos
+ )
+ )
+ {
+ // do not limit the number of threads
+ maxThreads = 40000u;
+ minFreeMem = 512u * byteToMiB;
+ }
+
// increase all intensity limits by two for aeon
if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
maxThreads *= 2u;
// keep 128MiB memory free (value is randomly chosen)
- size_t availableMem = ctx.freeMem - (128u * byteToMiB);
+ size_t availableMem = ctx.freeMem - minFreeMem;
// 224byte extra memory is used per thread for meta data
size_t perThread = hashMemSize + 224u;
size_t maxIntensity = availableMem / perThread;
@@ -138,7 +153,7 @@ private:
// set 8 threads per block (this is a good value for the most gpus)
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
- " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n"
+ " \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n"
" \"comp_mode\" : true\n" +
" },\n";
}
@@ -151,7 +166,9 @@ private:
configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
configTpl.replace("GPUCONFIG",conf);
configTpl.write(params::inst().configFileAMD);
- printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str());
+
+ const std::string backendName = xmrstak::params::inst().openCLVendor;
+ printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str());
}
std::vector<GpuContext> devVec;
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index f15b480..9bc3676 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -137,6 +137,8 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
for (i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);
+
+ const std::string backendName = xmrstak::params::inst().openCLVendor;
if(cfg.cpu_aff >= 0)
{
@@ -144,10 +146,10 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
#endif
- printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
+ printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff);
}
else
- printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);
+ printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i);
minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
pvThreads->push_back(thd);
diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp
index acedbd6..6f80a0f 100644
--- a/xmrstak/backend/backendConnector.cpp
+++ b/xmrstak/backend/backendConnector.cpp
@@ -77,11 +77,12 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
#ifndef CONF_NO_OPENCL
if(params::inst().useAMD)
{
- plugin amdplugin("AMD", "xmrstak_opencl_backend");
+ const std::string backendName = xmrstak::params::inst().openCLVendor;
+ plugin amdplugin(backendName, "xmrstak_opencl_backend");
std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
if(amdThreads->size() == 0)
- printer::inst()->print_msg(L0, "WARNING: backend AMD disabled.");
+ printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
}
#endif
OpenPOWER on IntegriCloud