Implement pool-controlled nonce allocation

author: fireice-uk <fireice-uk@users.noreply.github.com> 2017-10-13 16:35:10 +0100
committer: fireice-uk <fireice-uk@users.noreply.github.com> 2017-10-22 13:12:52 +0100
commit: f13f70c28eb9e4b38d3b4932f5845f1d5cc01906 (patch)
tree: dc2b85e4ddd9b02e35729502dd6ae96fc2443568 /xmrstak/backend
parent: 610f4f0fa72c60daa14dc0661f58ba23563afb61 (diff)
download: xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.zip
xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.tar.gz
7 files changed, 85 insertions, 65 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 5ca10d3..048c3f0 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -167,13 +167,11 @@ void minethd::consume_work()
 void minethd::work_main()
 {
 	uint64_t iCount = 0;
-
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
-	
+	pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
 	globalStates::inst().iConsumeCnt++;
-	uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
 	
 	while (bQuit == 0)
 	{
@@ -190,10 +188,8 @@ void minethd::work_main()
 			continue;
 		}
 
-		if(oWork.bNiceHash)
-			pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
+		uint32_t h_per_round = pGpuCtx->rawIntensity;
+		size_t round_ctr = 0;
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		uint32_t target = oWork.iTarget32;
@@ -201,6 +197,15 @@ void minethd::work_main()
 
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
+			//Allocate a new nonce every 16 rounds
+			if((++round_ctr & 0xF) == 0)
+			{
+				if(oWork.bNiceHash)
+					pGpuCtx->Nonce = globalStates::inst().calc_start_nonce(pGpuCtx->Nonce & 0xFF000000u, h_per_round * 16);
+				else
+					pGpuCtx->Nonce = globalStates::inst().calc_start_nonce(0, h_per_round * 16);
+			}
+
 			cl_uint results[0x100];
 			memset(results,0,sizeof(cl_uint)*(0x100));
 
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index 625fbe4..f677459 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -335,25 +335,35 @@ void minethd::work_main()
 			    either because of network latency, or a socket problem. Since we are
 			    raison d'etre of this software it us sensible to just wait until we have something*/
 
-			while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
 			continue;
 		}
 
+		size_t nonce_ctr = 250;
 		if(oWork.bNiceHash)
-			result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
+			result.iNonce = globalStates::inst().calc_start_nonce(*piNonce & 0xFF000000, 4096);
 		else
-			result.iNonce = calc_start_nonce(oWork.iResumeCnt);
+			result.iNonce = globalStates::inst().calc_start_nonce(0, 4096);
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID));
 
-		while(globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
 			if ((iCount & 0xF) == 0) //Store stats every 16 hashes
 			{
+				if(--nonce_ctr == 0)
+				{
+					if(oWork.bNiceHash)
+						result.iNonce = globalStates::inst().calc_start_nonce(*piNonce & 0xFF000000, 4096);
+					else
+						result.iNonce = globalStates::inst().calc_start_nonce(0, 4096);
+					nonce_ctr = 250;
+				}
+				
 				using namespace std::chrono;
 				uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
 				iHashCount.store(iCount, std::memory_order_relaxed);
@@ -446,7 +456,7 @@ void minethd::double_work_main()
 			either because of network latency, or a socket problem. Since we are
 			raison d'etre of this software it us sensible to just wait until we have something*/
 
-			while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
@@ -456,17 +466,27 @@ void minethd::double_work_main()
 			continue;
 		}
 
+		size_t nonce_ctr = 250;
 		if(oWork.bNiceHash)
-			iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt);
+			iNonce = globalStates::inst().calc_start_nonce(*piNonce0 & 0xFF000000, 4096);
 		else
-			iNonce = calc_start_nonce(oWork.iResumeCnt);
+			iNonce = globalStates::inst().calc_start_nonce(0, 4096);
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 
-		while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
 			if ((iCount & 0x7) == 0) //Store stats every 16 hashes
 			{
+				if(--nonce_ctr == 0)
+				{
+					if(oWork.bNiceHash)
+						iNonce = globalStates::inst().calc_start_nonce(*piNonce0 & 0xFF000000, 4096);
+					else
+						iNonce = globalStates::inst().calc_start_nonce(0, 4096);
+					nonce_ctr = 250;
+				}
+
 				using namespace std::chrono;
 				uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
 				iHashCount.store(iCount, std::memory_order_relaxed);
diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 9104040..78823c5 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -34,7 +34,7 @@ namespace xmrstak
 {
 
 
-void globalStates::switch_work(miner_work& pWork)
+void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
 	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
 	// faster than threads can consume them. This should never happen in real life.
@@ -43,6 +43,11 @@ void globalStates::switch_work(miner_work& pWork)
 	while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
 		std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
+	size_t xid = dat.pool_id;
+	dat.pool_id = pool_id;
+	pool_id = xid;
+
+	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
 	oGlobalWork = pWork;
 	iConsumeCnt.store(0, std::memory_order_seq_cst);
 	iGlobalJobNo++;
diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp
index 73ccf74..2be06ae 100644
--- a/xmrstak/backend/globalStates.hpp
+++ b/xmrstak/backend/globalStates.hpp
@@ -2,6 +2,7 @@
 
 #include "miner_work.hpp"
 #include "xmrstak/misc/environment.hpp"
+#include "xmrstak/misc/console.hpp"
 
 #include <atomic>
 
@@ -9,9 +10,18 @@
 namespace xmrstak
 {
 
-struct globalStates
+struct pool_data
 {
+	uint32_t iSavedNonce;
+	size_t   pool_id;
+ 
+	pool_data() : iSavedNonce(0), pool_id(0)
+	{
+	}
+};
 
+struct globalStates
+{
 	static inline globalStates& inst()
 	{
 		auto& env = environment::inst();
@@ -20,19 +30,26 @@ struct globalStates
 		return *env.pglobalStates;
 	}
 
-	void switch_work(miner_work& pWork);
+	//pool_data is in-out winapi style
+	void switch_work(miner_work& pWork, pool_data& dat);
+
+	inline uint32_t calc_start_nonce(uint32_t nicehash_nonce, uint32_t reserve_count)
+	{
+		uint32_t debug_nonce = nicehash_nonce | iGlobalNonce.fetch_add(reserve_count);
+		printer::inst()->print_msg(L1, "DEBUG: start_nonce assigned nh: %.8x rc: %.8x nonce: %.8x", nicehash_nonce, reserve_count, debug_nonce);
+	}
 
 	miner_work oGlobalWork;
 	std::atomic<uint64_t> iGlobalJobNo;
 	std::atomic<uint64_t> iConsumeCnt;
+	std::atomic<uint32_t> iGlobalNonce;
 	uint64_t iThreadCount;
+	size_t pool_id;
 
-	private:
-
+private:
 	globalStates() : iThreadCount(0)
 	{
 	}
-	
 };
 
 } // namepsace xmrstak
diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp
index 0be8f0a..ab964ce 100644
--- a/xmrstak/backend/iBackend.hpp
+++ b/xmrstak/backend/iBackend.hpp
@@ -9,38 +9,8 @@
 
 namespace xmrstak
 {
-	// only allowed for unsigned value \todo add static assert
-	template<typename T>
-	T reverseBits(T value)
-	{
-		/* init with value (to get LSB) */
-		T result = value;
-		/* extra shift needed at end */
-		int s = sizeof(T) * CHAR_BIT - 1;
-		for (value >>= 1; value; value >>= 1)
-		{
-			result <<= 1;
-			result |= value & 1;
-			s--;
-		}
-		/* shift when values highest bits are zero */
-		result <<= s;
-		return result;
-	}
-
 	struct iBackend
 	{
-		inline uint32_t calc_start_nonce(uint32_t resume)
-		{
-			return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + globalStates::inst().iThreadCount * resume));
-		}
-
-		// Limited version of the nonce calc above
-		inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume)
-		{
-			return start | ( calc_start_nonce(resume) >> 8u );
-		}
-
 		std::atomic<uint64_t> iHashCount;
 		std::atomic<uint64_t> iTimestamp;
 		uint32_t iThreadNo;
diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp
index aecbd70..6b5720c 100644
--- a/xmrstak/backend/miner_work.hpp
+++ b/xmrstak/backend/miner_work.hpp
@@ -15,7 +15,6 @@ namespace xmrstak
 		char        sJobID[64];
 		uint8_t     bWorkBlob[112];
 		uint32_t    iWorkSize;
-		uint32_t    iResumeCnt;
 		uint64_t    iTarget;
 		// \todo remove workaround needed for amd
 		uint32_t    iTarget32;
@@ -25,8 +24,8 @@ namespace xmrstak
 
 		miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { }
 
-		miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint32_t iResumeCnt,
-			uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iResumeCnt(iResumeCnt),
+		miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize,
+			uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize),
 			iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId)
 		{
 			assert(iWorkSize <= sizeof(bWorkBlob));
@@ -41,7 +40,6 @@ namespace xmrstak
 			assert(this != &from);
 
 			iWorkSize = from.iWorkSize;
-			iResumeCnt = from.iResumeCnt;
 			iTarget = from.iTarget;
 			iTarget32 = from.iTarget32;
 			bNiceHash = from.bNiceHash;
@@ -68,7 +66,6 @@ namespace xmrstak
 			assert(this != &from);
 
 			iWorkSize = from.iWorkSize;
-			iResumeCnt = from.iResumeCnt;
 			iTarget = from.iTarget;
 			iTarget32 = from.iTarget32;
 			bNiceHash = from.bNiceHash;
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index fcd01cd..6e157ab 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -192,11 +192,10 @@ void minethd::consume_work()
 void minethd::work_main()
 {
 	uint64_t iCount = 0;
-	uint32_t iNonce;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
-	uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
+	uint32_t iNonce = *(uint32_t*)(oWork.bWorkBlob + 39);
 
 	globalStates::inst().iConsumeCnt++;
 
@@ -222,16 +221,23 @@ void minethd::work_main()
 		}
 
 		cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize);
-		if(oWork.bNiceHash)
-			iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			iNonce = calc_start_nonce(oWork.iResumeCnt);
+
+		uint32_t h_per_round = ctx.device_blocks * ctx.device_threads;
+		size_t round_ctr = 0;
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
-
+			//Allocate a new nonce every 16 rounds
+			if((++round_ctr & 0xF) == 0)
+			{
+				if(oWork.bNiceHash)
+					iNonce = globalStates::inst().calc_start_nonce(iNonce & 0xFF000000u, h_per_round * 16);
+				else
+					iNonce = globalStates::inst().calc_start_nonce(0, h_per_round * 16);
+			}
+			
 			uint32_t foundNonce[10];
 			uint32_t foundCount;
 
@@ -257,8 +263,8 @@ void minethd::work_main()
 					executor::inst()->log_result_error("NVIDIA Invalid Result");
 			}
 
-			iCount += ctx.device_blocks * ctx.device_threads;
-			iNonce += ctx.device_blocks * ctx.device_threads;
+			iCount += h_per_round;
+			iNonce += h_per_round;
 
 			using namespace std::chrono;
 			uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
author	fireice-uk <fireice-uk@users.noreply.github.com>	2017-10-13 16:35:10 +0100
committer	fireice-uk <fireice-uk@users.noreply.github.com>	2017-10-22 13:12:52 +0100
commit	f13f70c28eb9e4b38d3b4932f5845f1d5cc01906 (patch)
tree	dc2b85e4ddd9b02e35729502dd6ae96fc2443568 /xmrstak/backend
parent	610f4f0fa72c60daa14dc0661f58ba23563afb61 (diff)
download	xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.zip xmr-stak-f13f70c28eb9e4b38d3b4932f5845f1d5cc01906.tar.gz