diff options
author | psychocrypt <psychocryptHPC@gmail.com> | 2018-02-28 23:33:23 +0100 |
---|---|---|
committer | psychocrypt <psychocryptHPC@gmail.com> | 2018-02-28 23:33:23 +0100 |
commit | 6488a0268706552b9320c9bbbc361222e9629eca (patch) | |
tree | 5344c985b398d3f178f474bb1174619104e27b19 /xmrstak | |
parent | 7b8506464ce9b099c0984d19978630a74aaf1e42 (diff) | |
download | xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.zip xmr-stak-6488a0268706552b9320c9bbbc361222e9629eca.tar.gz |
CUDA: reduce startup time
- reduce startup time for multi gpu systems
- initialize the GPU memory non concurrent
Diffstat (limited to 'xmrstak')
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 41 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.hpp | 10 |
2 files changed, 35 insertions, 16 deletions
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 9fd08fb..867a998 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -80,14 +80,22 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) ctx.syncMode = cfg.syncMode; this->affinity = cfg.cpu_aff; - std::unique_lock<std::mutex> lck(thd_aff_set); - std::future<void> order_guard = order_fix.get_future(); + std::future<void> numa_guard = numa_promise.get_future(); + thread_work_guard = thread_work_promise.get_future(); oWorkThd = std::thread(&minethd::work_main, this); - order_guard.wait(); + /* Wait until the gpu memory is initialized and numa cpu memory is pinned. + * The startup time is reduced if the memory is initialized in sequential order + * without concurrent threads (CUDA driver is less occupied). + */ + numa_guard.wait(); +} - if(affinity >= 0) //-1 means no affinity +void minethd::start_mining() +{ + thread_work_promise.set_value(); + if(this->affinity >= 0) //-1 means no affinity if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity)) printer::inst()->print_msg(L1, "WARNING setting affinity failed."); } @@ -179,6 +187,11 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor } + for (i = 0; i < n; i++) + { + static_cast<minethd*>((*pvThreads)[i])->start_mining(); + } + return pvThreads; } @@ -208,10 +221,18 @@ void minethd::work_main() if(affinity >= 0) //-1 means no affinity bindMemoryToNUMANode(affinity); - order_fix.set_value(); - std::unique_lock<std::mutex> lck(thd_aff_set); - lck.release(); + if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1) + { + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); + std::exit(0); + } + + // numa memory bind and gpu memory is initialized + numa_promise.set_value(); + std::this_thread::yield(); + // wait until all NVIDIA devices are initialized + thread_work_guard.wait(); uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; @@ -221,12 +242,6 @@ void minethd::work_main() globalStates::inst().iConsumeCnt++; - if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1) - { - printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); - std::exit(0); - } - bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero"); while (bQuit == 0) diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp index d13c868..fcd24fa 100644 --- a/xmrstak/backend/nvidia/minethd.hpp +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -32,7 +32,8 @@ private: typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg); - + void start_mining(); + void work_main(); void consume_work(); @@ -44,8 +45,11 @@ private: static miner_work oGlobalWork; miner_work oWork; - std::promise<void> order_fix; - std::mutex thd_aff_set; + std::promise<void> numa_promise; + std::promise<void> thread_work_promise; + + // block thread until all NVIDIA GPUs are initialized + std::future<void> thread_work_guard; std::thread oWorkThd; int64_t affinity; |