fix job consume (possible deadlock)

fix #1505 - fix possible deadlock of the executor thread - fix racecondition during the job consumation - remove switch_work in all classes `minethd` - move `consume_work` into `globalStates`
author: psychocrypt <psychocryptHPC@gmail.com> 2018-05-01 20:46:02 +0200
committer: Timothy Pearson <tpearson@raptorengineering.com> 2018-06-04 21:07:11 +0000
commit: d68036dfe013c4949fdc636a5f60e599555fe2ca (patch)
tree: c9c3095f9a4e24fc54fba64392a109dc6141162a /xmrstak/backend/globalStates.cpp
parent: e13b0d27b1e3e31bf79342153bcc705d326b20b1 (diff)
download: xmr-stak-d68036dfe013c4949fdc636a5f60e599555fe2ca.zip
xmr-stak-d68036dfe013c4949fdc636a5f60e599555fe2ca.tar.gz
1 files changed, 60 insertions, 9 deletions
diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 1ec7983..e60db8f 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -33,24 +33,75 @@
 namespace xmrstak
 {
 
+void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
+{
+	/* Only the executer thread which updates the job is ever setting iConsumeCnt
+	 * to 1000. In this case each consumer must wait until the job is fully updated.
+	 */
+	uint64_t numConsumer = 0;
+
+	/* Take care that we not consume a job if the job is updated.
+	 * If we leave the loop we have increased iConsumeCnt so that
+	 * the job will not be updated until we leave the method.
+	 */
+	do{
+		numConsumer = iConsumeCnt.load(std::memory_order_relaxed);
+		if(numConsumer < 1000)
+		{
+			// register that thread try consume job data
+			numConsumer = ++iConsumeCnt;
+			if(numConsumer >= 1000)
+			{
+				iConsumeCnt--;
+				// 11 is a arbitrary chosen prime number
+				std::this_thread::sleep_for(std::chrono::milliseconds(11));
+			}
+		}
+		else
+		{
+			// an other thread is preparing a new job, 11 is a arbitrary chosen prime number
+			std::this_thread::sleep_for(std::chrono::milliseconds(11));
+		}
+	}
+	while(numConsumer >= 1000);
+
+	threadWork = oGlobalWork;
+	currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);
+	
+	// signal that thread consumed work
+	iConsumeCnt--;
+}
 
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
-	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
-	// faster than threads can consume them. This should never happen in real life.
-	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
-
-	while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
+	/* 1000 is used to notify that the the job will be updated as soon
+	 * as all consumer (which currently coping oGlobalWork has copied
+	 * all data)
+	 */
+	iConsumeCnt += 1000;
+	// wait until all threads which entered consume_work are finished
+	while (iConsumeCnt.load(std::memory_order_relaxed) > 1000)
+	{
+		// 7 is a arbitrary chosen prime number which is smaller than the consumer waiting time
+		std::this_thread::sleep_for(std::chrono::milliseconds(7));
+	}
+	// BEGIN CRITICAL SECTION
+	// this notifies all threads that the job has changed
+	iGlobalJobNo++; 
 
 	size_t xid = dat.pool_id;
 	dat.pool_id = pool_id;
 	pool_id = xid;
 
-	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
+	/* Maybe a worker thread is updating the nonce while we read it.
+	 * In that case GPUs check the job ID after a nonce update and in the
+	 * case that it is a CPU thread we have a small chance (max 6 nonces per CPU thread)
+	 * that we recalculate a nonce after we reconnect to the current pool
+	 */
+	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
-	iConsumeCnt.store(0, std::memory_order_seq_cst);
-	iGlobalJobNo++;
+	// END CRITICAL SECTION: allow job consume
+	iConsumeCnt -= 1000;
 }
 
 } // namespace xmrstak
author	psychocrypt <psychocryptHPC@gmail.com>	2018-05-01 20:46:02 +0200
committer	Timothy Pearson <tpearson@raptorengineering.com>	2018-06-04 21:07:11 +0000
commit	d68036dfe013c4949fdc636a5f60e599555fe2ca (patch)
tree	c9c3095f9a4e24fc54fba64392a109dc6141162a /xmrstak/backend/globalStates.cpp
parent	e13b0d27b1e3e31bf79342153bcc705d326b20b1 (diff)
download	xmr-stak-d68036dfe013c4949fdc636a5f60e599555fe2ca.zip xmr-stak-d68036dfe013c4949fdc636a5f60e599555fe2ca.tar.gz