diff options
author | fireice-uk <fireice-uk@users.noreply.github.com> | 2017-10-13 16:35:10 +0100 |
---|---|---|
committer | fireice-uk <fireice-uk@users.noreply.github.com> | 2017-10-22 13:12:52 +0100 |
commit | f13f70c28eb9e4b38d3b4932f5845f1d5cc01906 (patch) | |
tree | dc2b85e4ddd9b02e35729502dd6ae96fc2443568 /xmrstak/backend | |
parent | 610f4f0fa72c60daa14dc0661f58ba23563afb61 (diff) | |
download | xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.zip xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.tar.gz |
Implement pool-controlled nonce allocation
Diffstat (limited to 'xmrstak/backend')
-rw-r--r-- | xmrstak/backend/amd/minethd.cpp | 19 | ||||
-rw-r--r-- | xmrstak/backend/cpu/minethd.cpp | 36 | ||||
-rw-r--r-- | xmrstak/backend/globalStates.cpp | 7 | ||||
-rw-r--r-- | xmrstak/backend/globalStates.hpp | 27 | ||||
-rw-r--r-- | xmrstak/backend/iBackend.hpp | 30 | ||||
-rw-r--r-- | xmrstak/backend/miner_work.hpp | 7 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 24 |
7 files changed, 85 insertions, 65 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 5ca10d3..048c3f0 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -167,13 +167,11 @@ void minethd::consume_work() void minethd::work_main() { uint64_t iCount = 0; - cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); - + pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39); globalStates::inst().iConsumeCnt++; - uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39); while (bQuit == 0) { @@ -190,10 +188,8 @@ void minethd::work_main() continue; } - if(oWork.bNiceHash) - pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); - else - pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); + uint32_t h_per_round = pGpuCtx->rawIntensity; + size_t round_ctr = 0; assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); uint32_t target = oWork.iTarget32; @@ -201,6 +197,15 @@ void minethd::work_main() while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { + //Allocate a new nonce every 16 rounds + if((++round_ctr & 0xF) == 0) + { + if(oWork.bNiceHash) + pGpuCtx->Nonce = globalStates::inst().calc_start_nonce(pGpuCtx->Nonce & 0xFF000000u, h_per_round * 16); + else + pGpuCtx->Nonce = globalStates::inst().calc_start_nonce(0, h_per_round * 16); + } + cl_uint results[0x100]; memset(results,0,sizeof(cl_uint)*(0x100)); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 625fbe4..f677459 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -335,25 +335,35 @@ void minethd::work_main() either because of network latency, or a socket problem. Since we are raison d'etre of this software it us sensible to just wait until we have something*/ - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); continue; } + size_t nonce_ctr = 250; if(oWork.bNiceHash) - result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); + result.iNonce = globalStates::inst().calc_start_nonce(*piNonce & 0xFF000000, 4096); else - result.iNonce = calc_start_nonce(oWork.iResumeCnt); + result.iNonce = globalStates::inst().calc_start_nonce(0, 4096); assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID)); - while(globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { if ((iCount & 0xF) == 0) //Store stats every 16 hashes { + if(--nonce_ctr == 0) + { + if(oWork.bNiceHash) + result.iNonce = globalStates::inst().calc_start_nonce(*piNonce & 0xFF000000, 4096); + else + result.iNonce = globalStates::inst().calc_start_nonce(0, 4096); + nonce_ctr = 250; + } + using namespace std::chrono; uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); iHashCount.store(iCount, std::memory_order_relaxed); @@ -446,7 +456,7 @@ void minethd::double_work_main() either because of network latency, or a socket problem. Since we are raison d'etre of this software it us sensible to just wait until we have something*/ - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); @@ -456,17 +466,27 @@ void minethd::double_work_main() continue; } + size_t nonce_ctr = 250; if(oWork.bNiceHash) - iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt); + iNonce = globalStates::inst().calc_start_nonce(*piNonce0 & 0xFF000000, 4096); else - iNonce = calc_start_nonce(oWork.iResumeCnt); + iNonce = globalStates::inst().calc_start_nonce(0, 4096); assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { if ((iCount & 0x7) == 0) //Store stats every 16 hashes { + if(--nonce_ctr == 0) + { + if(oWork.bNiceHash) + iNonce = globalStates::inst().calc_start_nonce(*piNonce0 & 0xFF000000, 4096); + else + iNonce = globalStates::inst().calc_start_nonce(0, 4096); + nonce_ctr = 250; + } + using namespace std::chrono; uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); iHashCount.store(iCount, std::memory_order_relaxed); diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp index 9104040..78823c5 100644 --- a/xmrstak/backend/globalStates.cpp +++ b/xmrstak/backend/globalStates.cpp @@ -34,7 +34,7 @@ namespace xmrstak { -void globalStates::switch_work(miner_work& pWork) +void globalStates::switch_work(miner_work& pWork, pool_data& dat) { // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work // faster than threads can consume them. This should never happen in real life. @@ -43,6 +43,11 @@ void globalStates::switch_work(miner_work& pWork) while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount) std::this_thread::sleep_for(std::chrono::milliseconds(100)); + size_t xid = dat.pool_id; + dat.pool_id = pool_id; + pool_id = xid; + + dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst); oGlobalWork = pWork; iConsumeCnt.store(0, std::memory_order_seq_cst); iGlobalJobNo++; diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp index 73ccf74..2be06ae 100644 --- a/xmrstak/backend/globalStates.hpp +++ b/xmrstak/backend/globalStates.hpp @@ -2,6 +2,7 @@ #include "miner_work.hpp" #include "xmrstak/misc/environment.hpp" +#include "xmrstak/misc/console.hpp" #include <atomic> @@ -9,9 +10,18 @@ namespace xmrstak { -struct globalStates +struct pool_data { + uint32_t iSavedNonce; + size_t pool_id; + + pool_data() : iSavedNonce(0), pool_id(0) + { + } +}; +struct globalStates +{ static inline globalStates& inst() { auto& env = environment::inst(); @@ -20,19 +30,26 @@ struct globalStates return *env.pglobalStates; } - void switch_work(miner_work& pWork); + //pool_data is in-out winapi style + void switch_work(miner_work& pWork, pool_data& dat); + + inline uint32_t calc_start_nonce(uint32_t nicehash_nonce, uint32_t reserve_count) + { + uint32_t debug_nonce = nicehash_nonce | iGlobalNonce.fetch_add(reserve_count); + printer::inst()->print_msg(L1, "DEBUG: start_nonce assigned nh: %.8x rc: %.8x nonce: %.8x", nicehash_nonce, reserve_count, debug_nonce); + } miner_work oGlobalWork; std::atomic<uint64_t> iGlobalJobNo; std::atomic<uint64_t> iConsumeCnt; + std::atomic<uint32_t> iGlobalNonce; uint64_t iThreadCount; + size_t pool_id; - private: - +private: globalStates() : iThreadCount(0) { } - }; } // namepsace xmrstak diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp index 0be8f0a..ab964ce 100644 --- a/xmrstak/backend/iBackend.hpp +++ b/xmrstak/backend/iBackend.hpp @@ -9,38 +9,8 @@ namespace xmrstak { - // only allowed for unsigned value \todo add static assert - template<typename T> - T reverseBits(T value) - { - /* init with value (to get LSB) */ - T result = value; - /* extra shift needed at end */ - int s = sizeof(T) * CHAR_BIT - 1; - for (value >>= 1; value; value >>= 1) - { - result <<= 1; - result |= value & 1; - s--; - } - /* shift when values highest bits are zero */ - result <<= s; - return result; - } - struct iBackend { - inline uint32_t calc_start_nonce(uint32_t resume) - { - return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + globalStates::inst().iThreadCount * resume)); - } - - // Limited version of the nonce calc above - inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume) - { - return start | ( calc_start_nonce(resume) >> 8u ); - } - std::atomic<uint64_t> iHashCount; std::atomic<uint64_t> iTimestamp; uint32_t iThreadNo; diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index aecbd70..6b5720c 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -15,7 +15,6 @@ namespace xmrstak char sJobID[64]; uint8_t bWorkBlob[112]; uint32_t iWorkSize; - uint32_t iResumeCnt; uint64_t iTarget; // \todo remove workaround needed for amd uint32_t iTarget32; @@ -25,8 +24,8 @@ namespace xmrstak miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { } - miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint32_t iResumeCnt, - uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iResumeCnt(iResumeCnt), + miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, + uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId) { assert(iWorkSize <= sizeof(bWorkBlob)); @@ -41,7 +40,6 @@ namespace xmrstak assert(this != &from); iWorkSize = from.iWorkSize; - iResumeCnt = from.iResumeCnt; iTarget = from.iTarget; iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; @@ -68,7 +66,6 @@ namespace xmrstak assert(this != &from); iWorkSize = from.iWorkSize; - iResumeCnt = from.iResumeCnt; iTarget = from.iTarget; iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index fcd01cd..6e157ab 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -192,11 +192,10 @@ void minethd::consume_work() void minethd::work_main() { uint64_t iCount = 0; - uint32_t iNonce; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); - uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39); + uint32_t iNonce = *(uint32_t*)(oWork.bWorkBlob + 39); globalStates::inst().iConsumeCnt++; @@ -222,16 +221,23 @@ void minethd::work_main() } cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize); - if(oWork.bNiceHash) - iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); - else - iNonce = calc_start_nonce(oWork.iResumeCnt); + + uint32_t h_per_round = ctx.device_blocks * ctx.device_threads; + size_t round_ctr = 0; assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { - + //Allocate a new nonce every 16 rounds + if((++round_ctr & 0xF) == 0) + { + if(oWork.bNiceHash) + iNonce = globalStates::inst().calc_start_nonce(iNonce & 0xFF000000u, h_per_round * 16); + else + iNonce = globalStates::inst().calc_start_nonce(0, h_per_round * 16); + } + uint32_t foundNonce[10]; uint32_t foundCount; @@ -257,8 +263,8 @@ void minethd::work_main() executor::inst()->log_result_error("NVIDIA Invalid Result"); } - iCount += ctx.device_blocks * ctx.device_threads; - iNonce += ctx.device_blocks * ctx.device_threads; + iCount += h_per_round; + iNonce += h_per_round; using namespace std::chrono; uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); |