From ee5d2c522f5dc9e5564433c05da2f3cc2a9e0ff4 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 14 Apr 2018 23:17:33 +0200 Subject: allow non AMD OpenCL driver and devices - add CLI flag to explicitly use non AMD OpenCL and devices - adjust OpenCL output (use OpenCL instead of AMD if --altOpenCL is sued) - optimize NVIDIA OpenCL auto suggestion --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 30 ++++++++++++++++++++++-------- xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 + xmrstak/backend/amd/autoAdjust.hpp | 23 ++++++++++++++++++++--- xmrstak/backend/amd/minethd.cpp | 6 ++++-- xmrstak/backend/backendConnector.cpp | 5 +++-- xmrstak/cli/cli-miner.cpp | 20 ++++++++++++++++++++ xmrstak/params.hpp | 3 +++ 7 files changed, 73 insertions(+), 15 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 79e80bd..9a4ba73 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -675,11 +675,18 @@ std::vector getAMDDevices(int index) } std::string devVendor(devVendorVec.data()); - if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) + + bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos; + bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos; + + std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor; + if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA")) { GpuContext ctx; std::vector devNameVec(1024); size_t maxMem; + if( devVendor.find("NVIDIA Corporation") != std::string::npos) + ctx.isNVIDIA = true; if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS) { @@ -699,6 +706,10 @@ std::vector getAMDDevices(int index) continue; } + // the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation + if(ctx.isNVIDIA) + maxMem = ctx.freeMem; + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k); @@ -747,13 +758,15 @@ int getAMDPlatformIdx() clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos || + + bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos || platformName.find("Apple") != std::string::npos || - platformName.find("Mesa") != std::string::npos - ) + platformName.find("Mesa") != std::string::npos; + bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos; + std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor; + if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA")) { - - printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); + printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str()); if(platformName.find("Mesa") != std::string::npos) mesaPlatform = i; else @@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) std::vector platformNameVec(infoSize); clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") == std::string::npos) + if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos) { printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); } @@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) { size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize; ctx[i].rawIntensity = reduced_intensity; - printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity)); + const std::string backendName = xmrstak::params::inst().openCLVendor; + printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity)); } if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 0db6c90..5ab80b8 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -27,6 +27,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + bool isNVIDIA = false; int compMode; /*Output vars*/ diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 6df0eea..e7e98d4 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -91,6 +91,7 @@ private: std::string conf; for(auto& ctx : devVec) { + size_t minFreeMem = 128u * byteToMiB; /* 1000 is a magic selected limit, the reason is that more than 2GiB memory * sowing down the memory performance because of TLB cache misses */ @@ -112,12 +113,26 @@ private: */ maxThreads = 2024u; } + + // NVIDIA optimizations + if( + ctx.isNVIDIA && ( + ctx.name.find("P100") != std::string::npos || + ctx.name.find("V100") != std::string::npos + ) + ) + { + // do not limit the number of threads + maxThreads = 40000u; + minFreeMem = 512u * byteToMiB; + } + // increase all intensity limits by two for aeon if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) maxThreads *= 2u; // keep 128MiB memory free (value is randomly chosen) - size_t availableMem = ctx.freeMem - (128u * byteToMiB); + size_t availableMem = ctx.freeMem - minFreeMem; // 224byte extra memory is used per thread for meta data size_t perThread = hashMemSize + 224u; size_t maxIntensity = availableMem / perThread; @@ -138,7 +153,7 @@ private: // set 8 threads per block (this is a good value for the most gpus) conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n" + " \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n" " \"comp_mode\" : true\n" + " },\n"; } @@ -151,7 +166,9 @@ private: configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex)); configTpl.replace("GPUCONFIG",conf); configTpl.write(params::inst().configFileAMD); - printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str()); + + const std::string backendName = xmrstak::params::inst().openCLVendor; + printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str()); } std::vector devVec; diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index ac21607..b1cc1f6 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -137,6 +137,8 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor for (i = 0; i < n; i++) { jconf::inst()->GetThreadConfig(i, cfg); + + const std::string backendName = xmrstak::params::inst().openCLVendor; if(cfg.cpu_aff >= 0) { @@ -144,10 +146,10 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory."); #endif - printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff); + printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff); } else - printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i); + printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i); minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg); pvThreads->push_back(thd); diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp index acedbd6..6f80a0f 100644 --- a/xmrstak/backend/backendConnector.cpp +++ b/xmrstak/backend/backendConnector.cpp @@ -77,11 +77,12 @@ std::vector* BackendConnector::thread_starter(miner_work& pWork) #ifndef CONF_NO_OPENCL if(params::inst().useAMD) { - plugin amdplugin("AMD", "xmrstak_opencl_backend"); + const std::string backendName = xmrstak::params::inst().openCLVendor; + plugin amdplugin(backendName, "xmrstak_opencl_backend"); std::vector* amdThreads = amdplugin.startBackend(static_cast(pvThreads->size()), pWork, environment::inst()); pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads)); if(amdThreads->size() == 0) - printer::inst()->print_msg(L0, "WARNING: backend AMD disabled."); + printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str()); } #endif diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 0c52e10..2407c29 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -80,6 +80,8 @@ void help() #ifndef CONF_NO_OPENCL cout<<" --noAMD disable the AMD miner backend"<=argc ) + { + printer::inst()->print_msg(L0, "No argument for parameter '--openCLVendor' given"); + win_exit(); + return 1; + } + std::string vendor(argv[i]); + params::inst().openCLVendor = vendor; + if(vendor != "AMD" && vendor != "NVIDIA") + { + printer::inst()->print_msg(L0, "'--openCLVendor' must be 'AMD' or 'NVIDIA'"); + win_exit(); + return 1; + } + } else if(opName.compare("--noAMDCache") == 0) { params::inst().AMDCache = false; diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp index 0171f28..6e676cf 100644 --- a/xmrstak/params.hpp +++ b/xmrstak/params.hpp @@ -24,6 +24,8 @@ struct params bool AMDCache; bool useNVIDIA; bool useCPU; + // user selected OpenCL vendor + std::string openCLVendor; bool poolUseTls = false; std::string poolURL; @@ -60,6 +62,7 @@ struct params AMDCache(true), useNVIDIA(true), useCPU(true), + openCLVendor("AMD"), configFile("config.txt"), configFilePools("pools.txt"), configFileAMD("amd.txt"), -- cgit v1.1