CUDA: reduce startup time

- reduce startup time for multi gpu systems - initialize the GPU memory non concurrent
author: psychocrypt <psychocryptHPC@gmail.com> 2018-02-28 23:33:23 +0100
committer: psychocrypt <psychocryptHPC@gmail.com> 2018-02-28 23:33:23 +0100
commit: 6488a0268706552b9320c9bbbc361222e9629eca (patch)
tree: 5344c985b398d3f178f474bb1174619104e27b19 /xmrstak
parent: 7b8506464ce9b099c0984d19978630a74aaf1e42 (diff)
download: xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.zip
xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.tar.gz
2 files changed, 35 insertions, 16 deletions
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 9fd08fb..867a998 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -80,14 +80,22 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
 	ctx.syncMode = cfg.syncMode;
 	this->affinity = cfg.cpu_aff;
 
-	std::unique_lock<std::mutex> lck(thd_aff_set);
-	std::future<void> order_guard = order_fix.get_future();
+	std::future<void> numa_guard = numa_promise.get_future();
+	thread_work_guard = thread_work_promise.get_future();
 
 	oWorkThd = std::thread(&minethd::work_main, this);
 
-	order_guard.wait();
+	/* Wait until the gpu memory is initialized and numa cpu memory is pinned.
+	 * The startup time is reduced if the memory is initialized in sequential order
+	 * without concurrent threads (CUDA driver is less occupied).
+	 */
+	numa_guard.wait();
+}
 
-	if(affinity >= 0) //-1 means no affinity
+void minethd::start_mining()
+{
+	thread_work_promise.set_value();
+	if(this->affinity >= 0) //-1 means no affinity
 		if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
 			printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
 }
@@ -179,6 +187,11 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 
 	}
 
+	for (i = 0; i < n; i++)
+	{
+		static_cast<minethd*>((*pvThreads)[i])->start_mining();
+	}
+
 	return pvThreads;
 }
 
@@ -208,10 +221,18 @@ void minethd::work_main()
 	if(affinity >= 0) //-1 means no affinity
 		bindMemoryToNUMANode(affinity);
 
-	order_fix.set_value();
-	std::unique_lock<std::mutex> lck(thd_aff_set);
-	lck.release();
+	if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
+	{
+		printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
+		std::exit(0);
+	}
+
+	// numa memory bind and gpu memory is initialized
+	numa_promise.set_value();
+
 	std::this_thread::yield();
+	// wait until all NVIDIA devices are initialized
+	thread_work_guard.wait();
 
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
@@ -221,12 +242,6 @@ void minethd::work_main()
 
 	globalStates::inst().iConsumeCnt++;
 
-	if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
-	{
-		printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
-		std::exit(0);
-	}
-
 	bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero");
 
 	while (bQuit == 0)
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index d13c868..fcd24fa 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -32,7 +32,8 @@ private:
 	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
 
 	minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg);
-
+	void start_mining();
+	
 	void work_main();
 	void consume_work();
 
@@ -44,8 +45,11 @@ private:
 	static miner_work oGlobalWork;
 	miner_work oWork;
 
-	std::promise<void> order_fix;
-	std::mutex thd_aff_set;
+	std::promise<void> numa_promise;
+	std::promise<void> thread_work_promise;
+
+	// block thread until all NVIDIA GPUs are initialized
+	std::future<void> thread_work_guard;
 
 	std::thread oWorkThd;
 	int64_t affinity;
author	psychocrypt <psychocryptHPC@gmail.com>	2018-02-28 23:33:23 +0100
committer	psychocrypt <psychocryptHPC@gmail.com>	2018-02-28 23:33:23 +0100
commit	6488a0268706552b9320c9bbbc361222e9629eca (patch)
tree	5344c985b398d3f178f474bb1174619104e27b19 /xmrstak
parent	7b8506464ce9b099c0984d19978630a74aaf1e42 (diff)
download	xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.zip xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.tar.gz