diff options
Diffstat (limited to 'xmrstak/backend')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 6 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.hpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/amd/minethd.cpp | 18 | ||||
-rw-r--r-- | xmrstak/backend/cpu/minethd.cpp | 42 | ||||
-rw-r--r-- | xmrstak/backend/globalStates.cpp | 7 | ||||
-rw-r--r-- | xmrstak/backend/globalStates.hpp | 29 | ||||
-rw-r--r-- | xmrstak/backend/iBackend.hpp | 30 | ||||
-rw-r--r-- | xmrstak/backend/miner_work.hpp | 7 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 24 |
9 files changed, 90 insertions, 75 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index f9908cb..37adc5b 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -844,7 +844,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) } }*/ - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + size_t tmpNonce = ctx->Nonce; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1); return ERR_OCL_API; @@ -897,7 +898,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size; // number of global threads must be a multiple of the work group size (w_size) assert(BranchNonces[i]%w_size == 0); - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + size_t tmpNonce = ctx->Nonce; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3); return ERR_OCL_API; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 5ff7ea1..c2d708d 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -36,7 +36,7 @@ struct GpuContext int computeUnits; std::string name; - size_t Nonce; + uint32_t Nonce; }; diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 5ca10d3..9d18860 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -167,13 +167,10 @@ void minethd::consume_work() void minethd::work_main() { uint64_t iCount = 0; - cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); - globalStates::inst().iConsumeCnt++; - uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39); while (bQuit == 0) { @@ -190,17 +187,24 @@ void minethd::work_main() continue; } - if(oWork.bNiceHash) - pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); - else - pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); + uint32_t h_per_round = pGpuCtx->rawIntensity; + size_t round_ctr = 0; assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); uint32_t target = oWork.iTarget32; XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); + if(oWork.bNiceHash) + pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39); + while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { + //Allocate a new nonce every 16 rounds + if((round_ctr++ & 0xF) == 0) + { + globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16); + } + cl_uint results[0x100]; memset(results,0,sizeof(cl_uint)*(0x100)); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 625fbe4..ac80cc2 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -335,31 +335,36 @@ void minethd::work_main() either because of network latency, or a socket problem. Since we are raison d'etre of this software it us sensible to just wait until we have something*/ - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); continue; } - if(oWork.bNiceHash) - result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); - else - result.iNonce = calc_start_nonce(oWork.iResumeCnt); + size_t nonce_ctr = 0; + constexpr size_t nonce_chunk = 4096; // Needs to be a power of 2 assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID)); - while(globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + if(oWork.bNiceHash) + result.iNonce = *piNonce; + + while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { - if ((iCount & 0xF) == 0) //Store stats every 16 hashes + if ((iCount++ & 0xF) == 0) //Store stats every 16 hashes { using namespace std::chrono; uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); iHashCount.store(iCount, std::memory_order_relaxed); iTimestamp.store(iStamp, std::memory_order_relaxed); } - iCount++; + + if((nonce_ctr++ & (nonce_chunk-1)) == 0) + { + globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk); + } *piNonce = ++result.iNonce; @@ -446,7 +451,7 @@ void minethd::double_work_main() either because of network latency, or a socket problem. Since we are raison d'etre of this software it us sensible to just wait until we have something*/ - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); @@ -456,14 +461,15 @@ void minethd::double_work_main() continue; } - if(oWork.bNiceHash) - iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt); - else - iNonce = calc_start_nonce(oWork.iResumeCnt); + size_t nonce_ctr = 0; + constexpr size_t nonce_chunk = 4096; //Needs to be a power of 2 assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); - while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + if(oWork.bNiceHash) + iNonce = *piNonce0; + + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { if ((iCount & 0x7) == 0) //Store stats every 16 hashes { @@ -472,8 +478,14 @@ void minethd::double_work_main() iHashCount.store(iCount, std::memory_order_relaxed); iTimestamp.store(iStamp, std::memory_order_relaxed); } - iCount += 2; + + + if((nonce_ctr++ & (nonce_chunk/2 - 1)) == 0) + { + globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk); + } + *piNonce0 = ++iNonce; *piNonce1 = ++iNonce; diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp index 9104040..78823c5 100644 --- a/xmrstak/backend/globalStates.cpp +++ b/xmrstak/backend/globalStates.cpp @@ -34,7 +34,7 @@ namespace xmrstak { -void globalStates::switch_work(miner_work& pWork) +void globalStates::switch_work(miner_work& pWork, pool_data& dat) { // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work // faster than threads can consume them. This should never happen in real life. @@ -43,6 +43,11 @@ void globalStates::switch_work(miner_work& pWork) while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount) std::this_thread::sleep_for(std::chrono::milliseconds(100)); + size_t xid = dat.pool_id; + dat.pool_id = pool_id; + pool_id = xid; + + dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst); oGlobalWork = pWork; iConsumeCnt.store(0, std::memory_order_seq_cst); iGlobalJobNo++; diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp index 73ccf74..1c28d5c 100644 --- a/xmrstak/backend/globalStates.hpp +++ b/xmrstak/backend/globalStates.hpp @@ -2,6 +2,7 @@ #include "miner_work.hpp" #include "xmrstak/misc/environment.hpp" +#include "xmrstak/misc/console.hpp" #include <atomic> @@ -9,9 +10,18 @@ namespace xmrstak { -struct globalStates +struct pool_data { + uint32_t iSavedNonce; + size_t pool_id; + + pool_data() : iSavedNonce(0), pool_id(0) + { + } +}; +struct globalStates +{ static inline globalStates& inst() { auto& env = environment::inst(); @@ -20,19 +30,28 @@ struct globalStates return *env.pglobalStates; } - void switch_work(miner_work& pWork); + //pool_data is in-out winapi style + void switch_work(miner_work& pWork, pool_data& dat); + + inline void calc_start_nonce(uint32_t& nonce, bool use_nicehash, uint32_t reserve_count) + { + if(use_nicehash) + nonce = (nonce & 0xFF000000) | iGlobalNonce.fetch_add(reserve_count); + else + nonce = iGlobalNonce.fetch_add(reserve_count); + } miner_work oGlobalWork; std::atomic<uint64_t> iGlobalJobNo; std::atomic<uint64_t> iConsumeCnt; + std::atomic<uint32_t> iGlobalNonce; uint64_t iThreadCount; + size_t pool_id; - private: - +private: globalStates() : iThreadCount(0) { } - }; } // namepsace xmrstak diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp index 0be8f0a..ab964ce 100644 --- a/xmrstak/backend/iBackend.hpp +++ b/xmrstak/backend/iBackend.hpp @@ -9,38 +9,8 @@ namespace xmrstak { - // only allowed for unsigned value \todo add static assert - template<typename T> - T reverseBits(T value) - { - /* init with value (to get LSB) */ - T result = value; - /* extra shift needed at end */ - int s = sizeof(T) * CHAR_BIT - 1; - for (value >>= 1; value; value >>= 1) - { - result <<= 1; - result |= value & 1; - s--; - } - /* shift when values highest bits are zero */ - result <<= s; - return result; - } - struct iBackend { - inline uint32_t calc_start_nonce(uint32_t resume) - { - return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + globalStates::inst().iThreadCount * resume)); - } - - // Limited version of the nonce calc above - inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume) - { - return start | ( calc_start_nonce(resume) >> 8u ); - } - std::atomic<uint64_t> iHashCount; std::atomic<uint64_t> iTimestamp; uint32_t iThreadNo; diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index aecbd70..6b5720c 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -15,7 +15,6 @@ namespace xmrstak char sJobID[64]; uint8_t bWorkBlob[112]; uint32_t iWorkSize; - uint32_t iResumeCnt; uint64_t iTarget; // \todo remove workaround needed for amd uint32_t iTarget32; @@ -25,8 +24,8 @@ namespace xmrstak miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { } - miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint32_t iResumeCnt, - uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iResumeCnt(iResumeCnt), + miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, + uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId) { assert(iWorkSize <= sizeof(bWorkBlob)); @@ -41,7 +40,6 @@ namespace xmrstak assert(this != &from); iWorkSize = from.iWorkSize; - iResumeCnt = from.iResumeCnt; iTarget = from.iTarget; iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; @@ -68,7 +66,6 @@ namespace xmrstak assert(this != &from); iWorkSize = from.iWorkSize; - iResumeCnt = from.iResumeCnt; iTarget = from.iTarget; iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index fcd01cd..7549c86 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -192,11 +192,10 @@ void minethd::consume_work() void minethd::work_main() { uint64_t iCount = 0; - uint32_t iNonce; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); - uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39); + uint32_t iNonce; globalStates::inst().iConsumeCnt++; @@ -222,16 +221,23 @@ void minethd::work_main() } cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize); - if(oWork.bNiceHash) - iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); - else - iNonce = calc_start_nonce(oWork.iResumeCnt); + + uint32_t h_per_round = ctx.device_blocks * ctx.device_threads; + size_t round_ctr = 0; assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + if(oWork.bNiceHash) + iNonce = *(uint32_t*)(oWork.bWorkBlob + 39); + while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { - + //Allocate a new nonce every 16 rounds + if((round_ctr++ & 0xF) == 0) + { + globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, h_per_round * 16); + } + uint32_t foundNonce[10]; uint32_t foundCount; @@ -257,8 +263,8 @@ void minethd::work_main() executor::inst()->log_result_error("NVIDIA Invalid Result"); } - iCount += ctx.device_blocks * ctx.device_threads; - iNonce += ctx.device_blocks * ctx.device_threads; + iCount += h_per_round; + iNonce += h_per_round; using namespace std::chrono; uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); |