From ee5d2c522f5dc9e5564433c05da2f3cc2a9e0ff4 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Sat, 14 Apr 2018 23:17:33 +0200
Subject: allow non AMD OpenCL driver and devices

- add CLI flag to explicitly use non AMD OpenCL and devices
- adjust OpenCL output (use OpenCL instead of AMD if --altOpenCL is sued)
- optimize NVIDIA OpenCL auto suggestion
---
 xmrstak/backend/amd/amd_gpu/gpu.cpp  | 30 ++++++++++++++++++++++--------
 xmrstak/backend/amd/amd_gpu/gpu.hpp  |  1 +
 xmrstak/backend/amd/autoAdjust.hpp   | 23 ++++++++++++++++++++---
 xmrstak/backend/amd/minethd.cpp      |  6 ++++--
 xmrstak/backend/backendConnector.cpp |  5 +++--
 xmrstak/cli/cli-miner.cpp            | 20 ++++++++++++++++++++
 xmrstak/params.hpp                   |  3 +++
 7 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 79e80bd..9a4ba73 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -675,11 +675,18 @@ std::vector<GpuContext> getAMDDevices(int index)
 		}
 
 		std::string devVendor(devVendorVec.data());
-		if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos)
+
+		bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos;
+		bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos;
+
+		std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
+		if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
 		{
 			GpuContext ctx;
 			std::vector<char> devNameVec(1024);
 			size_t maxMem;
+			if( devVendor.find("NVIDIA Corporation") != std::string::npos)
+				ctx.isNVIDIA = true;
 
 			if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS)
 			{
@@ -699,6 +706,10 @@ std::vector<GpuContext> getAMDDevices(int index)
 				continue;
 			}
 
+			// the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation
+			if(ctx.isNVIDIA)
+				maxMem = ctx.freeMem;
+
 			if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
 			{
 				printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
@@ -747,13 +758,15 @@ int getAMDPlatformIdx()
 
 			clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
 			std::string platformName(platformNameVec.data());
-			if( platformName.find("Advanced Micro Devices") != std::string::npos ||
+
+			bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos ||
 				platformName.find("Apple") != std::string::npos ||
-				platformName.find("Mesa") != std::string::npos
-			)
+				platformName.find("Mesa") != std::string::npos;
+			bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos;
+			std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
+			if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
 			{
-
-				printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
+				printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str());
 				if(platformName.find("Mesa") != std::string::npos)
 					mesaPlatform = i;
 				else
@@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
 	std::vector<char> platformNameVec(infoSize);
 	clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
 	std::string platformName(platformNameVec.data());
-	if( platformName.find("Advanced Micro Devices") == std::string::npos)
+	if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos)
 	{
 		printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
 	}
@@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
 		{
 			size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize;
 			ctx[i].rawIntensity = reduced_intensity;
-			printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity));
+			const std::string backendName = xmrstak::params::inst().openCLVendor;
+			printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity));
 		}
 
 		if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 0db6c90..5ab80b8 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -27,6 +27,7 @@ struct GpuContext
 	size_t workSize;
 	int stridedIndex;
 	int memChunk;
+	bool isNVIDIA = false;
 	int compMode;
 
 	/*Output vars*/
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 6df0eea..e7e98d4 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -91,6 +91,7 @@ private:
 		std::string conf;
 		for(auto& ctx : devVec)
 		{
+			size_t minFreeMem = 128u * byteToMiB;
 			/* 1000 is a magic selected limit, the reason is that more than 2GiB memory
 			 * sowing down the memory performance because of TLB cache misses
 			 */
@@ -112,12 +113,26 @@ private:
 				 */
 				maxThreads = 2024u;
 			}
+
+			// NVIDIA optimizations
+			if(
+				ctx.isNVIDIA && (
+					ctx.name.find("P100") != std::string::npos ||
+				    ctx.name.find("V100") != std::string::npos
+				)
+			)
+			{
+				// do not limit the number of threads
+				maxThreads = 40000u;
+				minFreeMem = 512u * byteToMiB;
+			}
+
 			// increase all intensity limits by two for aeon
 			if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
 				maxThreads *= 2u;
 
 			// keep 128MiB memory free (value is randomly chosen)
-			size_t availableMem = ctx.freeMem - (128u * byteToMiB);
+			size_t availableMem = ctx.freeMem - minFreeMem;
 			// 224byte extra memory is used per thread for meta data
 			size_t perThread = hashMemSize + 224u;
 			size_t maxIntensity = availableMem / perThread;
@@ -138,7 +153,7 @@ private:
 				// set 8 threads per block (this is a good value for the most gpus)
 				conf += std::string("  { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
 					"    \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
-					"    \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n"
+					"    \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n"
 					"    \"comp_mode\" : true\n" +
 					"  },\n";
 			}
@@ -151,7 +166,9 @@ private:
 		configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
 		configTpl.replace("GPUCONFIG",conf);
 		configTpl.write(params::inst().configFileAMD);
-		printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str());
+
+		const std::string backendName = xmrstak::params::inst().openCLVendor;
+		printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str());
 	}
 
 	std::vector<GpuContext> devVec;
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index ac21607..b1cc1f6 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -137,6 +137,8 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	for (i = 0; i < n; i++)
 	{
 		jconf::inst()->GetThreadConfig(i, cfg);
+
+		const std::string backendName = xmrstak::params::inst().openCLVendor;
 		
 		if(cfg.cpu_aff >= 0)
 		{
@@ -144,10 +146,10 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 			printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
 #endif
 
-			printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
+			printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff);
 		}
 		else
-			printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);
+			printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i);
 
 		minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
 		pvThreads->push_back(thd);
diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp
index acedbd6..6f80a0f 100644
--- a/xmrstak/backend/backendConnector.cpp
+++ b/xmrstak/backend/backendConnector.cpp
@@ -77,11 +77,12 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
 #ifndef CONF_NO_OPENCL
 	if(params::inst().useAMD)
 	{
-		plugin amdplugin("AMD", "xmrstak_opencl_backend");
+		const std::string backendName = xmrstak::params::inst().openCLVendor;
+		plugin amdplugin(backendName, "xmrstak_opencl_backend");
 		std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
 		pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
 		if(amdThreads->size() == 0)
-			printer::inst()->print_msg(L0, "WARNING: backend AMD disabled.");
+			printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
 	}
 #endif
 
diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp
index 0c52e10..2407c29 100644
--- a/xmrstak/cli/cli-miner.cpp
+++ b/xmrstak/cli/cli-miner.cpp
@@ -80,6 +80,8 @@ void help()
 #ifndef CONF_NO_OPENCL
 	cout<<"  --noAMD                    disable the AMD miner backend"<<endl;
 	cout<<"  --noAMDCache               disable the AMD(OpenCL) cache for precompiled binaries"<<endl;
+	cout<<"  --openCLVendor VENDOR      use OpenCL driver of VENDOR and devices [AMD,NVIDIA]"<<endl;
+	cout<<"                             default: AMD"<<endl;
 	cout<<"  --amd FILE                 AMD backend miner config file"<<endl;
 #endif
 #ifndef CONF_NO_CUDA
@@ -452,6 +454,24 @@ int main(int argc, char *argv[])
 		{
 			params::inst().useAMD = false;
 		}
+		else if(opName.compare("--openCLVendor") == 0)
+		{
+			++i;
+			if( i >=argc )
+			{
+				printer::inst()->print_msg(L0, "No argument for parameter '--openCLVendor' given");
+				win_exit();
+				return 1;
+			}
+			std::string vendor(argv[i]);
+			params::inst().openCLVendor = vendor;
+			if(vendor != "AMD" && vendor != "NVIDIA")
+			{
+				printer::inst()->print_msg(L0, "'--openCLVendor' must be 'AMD' or 'NVIDIA'");
+				win_exit();
+				return 1;
+			}
+		}
 		else if(opName.compare("--noAMDCache") == 0)
 		{
 			params::inst().AMDCache = false;
diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp
index 0171f28..6e676cf 100644
--- a/xmrstak/params.hpp
+++ b/xmrstak/params.hpp
@@ -24,6 +24,8 @@ struct params
 	bool AMDCache;
 	bool useNVIDIA;
 	bool useCPU;
+	// user selected OpenCL vendor
+	std::string openCLVendor;
 
 	bool poolUseTls = false;
 	std::string poolURL;
@@ -60,6 +62,7 @@ struct params
 		AMDCache(true),
 		useNVIDIA(true),
 		useCPU(true),
+		openCLVendor("AMD"),
 		configFile("config.txt"),
 		configFilePools("pools.txt"),
 		configFileAMD("amd.txt"),
-- 
cgit v1.1