From 95cc462d45dbcda08e32f43c53384b00028ad256 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Tue, 1 May 2018 20:46:02 +0200
Subject: fix job consume (possible deadlock)

fix #1505

- fix possible deadlock of the executor thread
- fix racecondition during the job consumation
- remove switch_work in all classes `minethd`
- move `consume_work` into `globalStates`
---
 xmrstak/backend/globalStates.cpp | 69 ++++++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 9 deletions(-)

(limited to 'xmrstak/backend/globalStates.cpp')

diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 1ec7983..e60db8f 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -33,24 +33,75 @@
 namespace xmrstak
 {
 
+void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
+{
+	/* Only the executer thread which updates the job is ever setting iConsumeCnt
+	 * to 1000. In this case each consumer must wait until the job is fully updated.
+	 */
+	uint64_t numConsumer = 0;
+
+	/* Take care that we not consume a job if the job is updated.
+	 * If we leave the loop we have increased iConsumeCnt so that
+	 * the job will not be updated until we leave the method.
+	 */
+	do{
+		numConsumer = iConsumeCnt.load(std::memory_order_relaxed);
+		if(numConsumer < 1000)
+		{
+			// register that thread try consume job data
+			numConsumer = ++iConsumeCnt;
+			if(numConsumer >= 1000)
+			{
+				iConsumeCnt--;
+				// 11 is a arbitrary chosen prime number
+				std::this_thread::sleep_for(std::chrono::milliseconds(11));
+			}
+		}
+		else
+		{
+			// an other thread is preparing a new job, 11 is a arbitrary chosen prime number
+			std::this_thread::sleep_for(std::chrono::milliseconds(11));
+		}
+	}
+	while(numConsumer >= 1000);
+
+	threadWork = oGlobalWork;
+	currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);
+	
+	// signal that thread consumed work
+	iConsumeCnt--;
+}
 
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
-	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
-	// faster than threads can consume them. This should never happen in real life.
-	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
-
-	while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
+	/* 1000 is used to notify that the the job will be updated as soon
+	 * as all consumer (which currently coping oGlobalWork has copied
+	 * all data)
+	 */
+	iConsumeCnt += 1000;
+	// wait until all threads which entered consume_work are finished
+	while (iConsumeCnt.load(std::memory_order_relaxed) > 1000)
+	{
+		// 7 is a arbitrary chosen prime number which is smaller than the consumer waiting time
+		std::this_thread::sleep_for(std::chrono::milliseconds(7));
+	}
+	// BEGIN CRITICAL SECTION
+	// this notifies all threads that the job has changed
+	iGlobalJobNo++; 
 
 	size_t xid = dat.pool_id;
 	dat.pool_id = pool_id;
 	pool_id = xid;
 
-	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
+	/* Maybe a worker thread is updating the nonce while we read it.
+	 * In that case GPUs check the job ID after a nonce update and in the
+	 * case that it is a CPU thread we have a small chance (max 6 nonces per CPU thread)
+	 * that we recalculate a nonce after we reconnect to the current pool
+	 */
+	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
-	iConsumeCnt.store(0, std::memory_order_seq_cst);
-	iGlobalJobNo++;
+	// END CRITICAL SECTION: allow job consume
+	iConsumeCnt -= 1000;
 }
 
 } // namespace xmrstak
-- 
cgit v1.1


From 154f2ded6cd11570c9cc2ea51919da08ec524077 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Thu, 3 May 2018 20:35:56 +0200
Subject: add read write lock class

add log class from Will Zhang:

Package: cpputil
Source: https://github.com/willzhang4a58/cpputil
License: MIT License
---
 xmrstak/backend/globalStates.cpp | 51 +++++-----------------------------------
 1 file changed, 6 insertions(+), 45 deletions(-)

(limited to 'xmrstak/backend/globalStates.cpp')

diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index e60db8f..6058b7a 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -35,57 +35,18 @@ namespace xmrstak
 
 void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
 {
-	/* Only the executer thread which updates the job is ever setting iConsumeCnt
-	 * to 1000. In this case each consumer must wait until the job is fully updated.
-	 */
-	uint64_t numConsumer = 0;
-
-	/* Take care that we not consume a job if the job is updated.
-	 * If we leave the loop we have increased iConsumeCnt so that
-	 * the job will not be updated until we leave the method.
-	 */
-	do{
-		numConsumer = iConsumeCnt.load(std::memory_order_relaxed);
-		if(numConsumer < 1000)
-		{
-			// register that thread try consume job data
-			numConsumer = ++iConsumeCnt;
-			if(numConsumer >= 1000)
-			{
-				iConsumeCnt--;
-				// 11 is a arbitrary chosen prime number
-				std::this_thread::sleep_for(std::chrono::milliseconds(11));
-			}
-		}
-		else
-		{
-			// an other thread is preparing a new job, 11 is a arbitrary chosen prime number
-			std::this_thread::sleep_for(std::chrono::milliseconds(11));
-		}
-	}
-	while(numConsumer >= 1000);
+	jobLock.rdlock();
 
 	threadWork = oGlobalWork;
 	currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);
 	
-	// signal that thread consumed work
-	iConsumeCnt--;
+	jobLock.unlock();
 }
 
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
-	/* 1000 is used to notify that the the job will be updated as soon
-	 * as all consumer (which currently coping oGlobalWork has copied
-	 * all data)
-	 */
-	iConsumeCnt += 1000;
-	// wait until all threads which entered consume_work are finished
-	while (iConsumeCnt.load(std::memory_order_relaxed) > 1000)
-	{
-		// 7 is a arbitrary chosen prime number which is smaller than the consumer waiting time
-		std::this_thread::sleep_for(std::chrono::milliseconds(7));
-	}
-	// BEGIN CRITICAL SECTION
+	jobLock.wrlock();
+	
 	// this notifies all threads that the job has changed
 	iGlobalJobNo++; 
 
@@ -100,8 +61,8 @@ void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 	 */
 	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
-	// END CRITICAL SECTION: allow job consume
-	iConsumeCnt -= 1000;
+	
+	jobLock.unlock();
 }
 
 } // namespace xmrstak
-- 
cgit v1.1


From 0f96f51ca5f4238cdbeeaf243de8ae4f428901b1 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Thu, 3 May 2018 20:40:49 +0200
Subject: use read write locks to secure job updates

user read write locks to be sure that no job is consumend during the job update
---
 xmrstak/backend/globalStates.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'xmrstak/backend/globalStates.cpp')

diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 6058b7a..8de6bfe 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -35,17 +35,17 @@ namespace xmrstak
 
 void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
 {
-	jobLock.rdlock();
+	jobLock.ReadLock();
 
 	threadWork = oGlobalWork;
 	currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);
 	
-	jobLock.unlock();
+	jobLock.UnLock();
 }
 
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
-	jobLock.wrlock();
+	jobLock.WriteLock();
 	
 	// this notifies all threads that the job has changed
 	iGlobalJobNo++; 
@@ -62,7 +62,7 @@ void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
 	
-	jobLock.unlock();
+	jobLock.UnLock();
 }
 
 } // namespace xmrstak
-- 
cgit v1.1


From 460aa90d252645a6351f8557b564fcd5686ddcc1 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Fri, 4 May 2018 20:18:48 +0200
Subject: github annotations

- reformat `read_write_lock.h`
- fix spelling issue
- move job id increase of the write to the buttom
---
 xmrstak/backend/globalStates.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'xmrstak/backend/globalStates.cpp')

diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 8de6bfe..9f41be2 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -46,9 +46,6 @@ void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
 	jobLock.WriteLock();
-	
-	// this notifies all threads that the job has changed
-	iGlobalJobNo++; 
 
 	size_t xid = dat.pool_id;
 	dat.pool_id = pool_id;
@@ -61,6 +58,9 @@ void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 	 */
 	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
+
+	// this notifies all threads that the job has changed
+	iGlobalJobNo++;
 	
 	jobLock.UnLock();
 }
-- 
cgit v1.1


From 0d85a32dce63bb8c735fd8a8ab0e71b474b9b399 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Tue, 22 May 2018 23:05:46 +0200
Subject: fix duplicated nonce usage

- avoid that a nonce which not fits to the current job is used (check jobId after start nonce is consumed)
- move jobId check into the if condition to get a new bunch of nonces
- CPU: add jobId validation after the start nonce is consumed
---
 xmrstak/backend/globalStates.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'xmrstak/backend/globalStates.cpp')

diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 9f41be2..3bd7d0e 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -47,20 +47,21 @@ void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
 	jobLock.WriteLock();
 
+	/* This notifies all threads that the job has changed.
+	 * To avoid duplicated shared this must be done before the nonce is exchanged.
+	 */
+	iGlobalJobNo++;
+	
 	size_t xid = dat.pool_id;
 	dat.pool_id = pool_id;
 	pool_id = xid;
 
 	/* Maybe a worker thread is updating the nonce while we read it.
-	 * In that case GPUs check the job ID after a nonce update and in the
-	 * case that it is a CPU thread we have a small chance (max 6 nonces per CPU thread)
-	 * that we recalculate a nonce after we reconnect to the current pool
+	 * To avoid duplicated share calculations the job ID is checked in the worker thread
+	 * after the nonce is read.
 	 */
 	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
-
-	// this notifies all threads that the job has changed
-	iGlobalJobNo++;
 	
 	jobLock.UnLock();
 }
-- 
cgit v1.1