optimize NVIDIA autosuggestion

- avoid creation of a config with zero threads or blocks - WINDOWS: reduce the used memory for the auto suggestion by the amount of already used memory
author: psychocrypt <psychocrypt@users.noreply.github.com> 2017-11-03 21:46:27 +0100
committer: psychocrypt <psychocrypt@users.noreply.github.com> 2017-11-03 21:46:27 +0100
commit: 5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd (patch)
tree: 69643e17067d0147f83ec0ed4dbe6f9b4baf6835 /xmrstak
parent: 11f1028782ca62df10a08e6b5907f2f252bc3fc7 (diff)
download: xmr-stak-5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd.zip
xmr-stak-5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd.tar.gz
2 files changed, 30 insertions, 9 deletions
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index 87787fa..bf04518 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -88,17 +88,18 @@ private:
 
 		constexpr size_t byte2mib = 1024u * 1024u;
 		std::string conf;
-        int i = 0;
         for(auto& ctx : nvidCtxVec)
         {
-			conf += std::string("  // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n";
-			conf += std::string("  //      memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/"  + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n";
-            conf += std::string("  { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
-                "    \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
-                "    \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" :  " + std::to_string(ctx.device_bsleep) + ",\n" +
-                "    \"affine_to_cpu\" : false,\n" +
-                "  },\n";
-            ++i;
+			if(ctx.device_threads * ctx.device_blocks > 0)
+			{
+				conf += std::string("  // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n";
+				conf += std::string("  //      memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/"  + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n";
+				conf += std::string("  { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
+					"    \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
+					"    \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" :  " + std::to_string(ctx.device_bsleep) + ",\n" +
+					"    \"affine_to_cpu\" : false,\n" +
+					"  },\n";
+			}
         }
 
 		configTpl.replace("GPUCONFIG",conf);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index b161258..e18532f 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -364,6 +364,26 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 			hashMemSize = AEON_MEMORY;
 		}
 
+#ifdef WIN32
+		/* We use in windows bfactor (split slow kernel into smaller parts) to avoid
+		 * that windows is killing long running kernel.
+		 * In the case there is already memory used on the gpu than we
+		 * assume that other application are running between the split kernel,
+		 * this can result into TLB memory flushes and can strongly reduce the performance
+		 * and the result can be that windows is killing the miner.
+		 * Be reducing maxMemUsage we try to avoid this effect.
+		 */
+		size_t usedMem = totalMemory - freeMemory;
+		if(usedMem >= maxMemUsage)
+		{
+			printf("WARNING: NVIDIA GPU %d: already %s MiB memory in use, skip GPU.\n",
+				ctx->device_id,
+				std::to_string(usedMem/byteToMiB).c_str());
+		}
+		else
+			maxMemUsage -= usedMem;
+
+#endif
 		// keep 128MiB memory free (value is randomly chosen)
 		// 200byte are meta data memory (result nonce, ...)
 		size_t availableMem = freeMemory - (128u * byteToMiB) - 200u;
author	psychocrypt <psychocrypt@users.noreply.github.com>	2017-11-03 21:46:27 +0100
committer	psychocrypt <psychocrypt@users.noreply.github.com>	2017-11-03 21:46:27 +0100
commit	5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd (patch)
tree	69643e17067d0147f83ec0ed4dbe6f9b4baf6835 /xmrstak
parent	11f1028782ca62df10a08e6b5907f2f252bc3fc7 (diff)
download	xmr-stak-5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd.zip xmr-stak-5acec3ea8e03469ae9d8cfb603ac45b3b5de6ffd.tar.gz