summaryrefslogtreecommitdiffstats
path: root/xmrstak
diff options
context:
space:
mode:
authorfireice-uk <fireice-uk@users.noreply.github.com>2018-03-04 19:28:09 +0000
committerGitHub <noreply@github.com>2018-03-04 19:28:09 +0000
commit98763bf01f36a6731dcd0abd39cf68fe85143555 (patch)
tree63d972b187a4adeefb66006117af32603f270aac /xmrstak
parent266b29146a5c01b2bfccbad5f6876a1e1d78ac52 (diff)
parent6488a0268706552b9320c9bbbc361222e9629eca (diff)
downloadxmr-stak-98763bf01f36a6731dcd0abd39cf68fe85143555.zip
xmr-stak-98763bf01f36a6731dcd0abd39cf68fe85143555.tar.gz
Merge pull request #1121 from psychocrypt/topic-speedupCUDAStartup
CUDA: reduce startup time
Diffstat (limited to 'xmrstak')
-rw-r--r--xmrstak/backend/nvidia/minethd.cpp41
-rw-r--r--xmrstak/backend/nvidia/minethd.hpp10
2 files changed, 35 insertions, 16 deletions
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 9fd08fb..867a998 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -80,14 +80,22 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
ctx.syncMode = cfg.syncMode;
this->affinity = cfg.cpu_aff;
- std::unique_lock<std::mutex> lck(thd_aff_set);
- std::future<void> order_guard = order_fix.get_future();
+ std::future<void> numa_guard = numa_promise.get_future();
+ thread_work_guard = thread_work_promise.get_future();
oWorkThd = std::thread(&minethd::work_main, this);
- order_guard.wait();
+ /* Wait until the gpu memory is initialized and numa cpu memory is pinned.
+ * The startup time is reduced if the memory is initialized in sequential order
+ * without concurrent threads (CUDA driver is less occupied).
+ */
+ numa_guard.wait();
+}
- if(affinity >= 0) //-1 means no affinity
+void minethd::start_mining()
+{
+ thread_work_promise.set_value();
+ if(this->affinity >= 0) //-1 means no affinity
if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
}
@@ -179,6 +187,11 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
}
+ for (i = 0; i < n; i++)
+ {
+ static_cast<minethd*>((*pvThreads)[i])->start_mining();
+ }
+
return pvThreads;
}
@@ -208,10 +221,18 @@ void minethd::work_main()
if(affinity >= 0) //-1 means no affinity
bindMemoryToNUMANode(affinity);
- order_fix.set_value();
- std::unique_lock<std::mutex> lck(thd_aff_set);
- lck.release();
+ if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
+ {
+ printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
+ std::exit(0);
+ }
+
+ // numa memory bind and gpu memory is initialized
+ numa_promise.set_value();
+
std::this_thread::yield();
+ // wait until all NVIDIA devices are initialized
+ thread_work_guard.wait();
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
@@ -221,12 +242,6 @@ void minethd::work_main()
globalStates::inst().iConsumeCnt++;
- if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
- {
- printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
- std::exit(0);
- }
-
bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero");
while (bQuit == 0)
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index d13c868..fcd24fa 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -32,7 +32,8 @@ private:
typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg);
-
+ void start_mining();
+
void work_main();
void consume_work();
@@ -44,8 +45,11 @@ private:
static miner_work oGlobalWork;
miner_work oWork;
- std::promise<void> order_fix;
- std::mutex thd_aff_set;
+ std::promise<void> numa_promise;
+ std::promise<void> thread_work_promise;
+
+ // block thread until all NVIDIA GPUs are initialized
+ std::future<void> thread_work_guard;
std::thread oWorkThd;
int64_t affinity;
OpenPOWER on IntegriCloud