3 files changed, 16 insertions, 10 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index f9908cb..37adc5b 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -844,7 +844,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 		}
 	}*/
 
-	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+	size_t tmpNonce = ctx->Nonce;
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
 		return ERR_OCL_API;
@@ -897,7 +898,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 			BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size;
 			// number of global threads must be a multiple of the work group size (w_size)
 			assert(BranchNonces[i]%w_size == 0);
-			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+			size_t tmpNonce = ctx->Nonce;
+			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 			{
 				printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
 				return ERR_OCL_API;
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 5ff7ea1..c2d708d 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -36,7 +36,7 @@ struct GpuContext
 	int computeUnits;
 	std::string name;
 
-	size_t Nonce;
+	uint32_t Nonce;
 
 };
 
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 5ca10d3..9d18860 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -167,13 +167,10 @@ void minethd::consume_work()
 void minethd::work_main()
 {
 	uint64_t iCount = 0;
-
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
-	
 	globalStates::inst().iConsumeCnt++;
-	uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
 	
 	while (bQuit == 0)
 	{
@@ -190,17 +187,24 @@ void minethd::work_main()
 			continue;
 		}
 
-		if(oWork.bNiceHash)
-			pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
+		uint32_t h_per_round = pGpuCtx->rawIntensity;
+		size_t round_ctr = 0;
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		uint32_t target = oWork.iTarget32;
 		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
 
+		if(oWork.bNiceHash)
+			pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
+
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
+			//Allocate a new nonce every 16 rounds
+			if((round_ctr++ & 0xF) == 0)
+			{
+				globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
+			}
+
 			cl_uint results[0x100];
 			memset(results,0,sizeof(cl_uint)*(0x100));