From 611378eb968eeda5700fe257a27d87856aa112f0 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocrypt@users.noreply.github.com>
Date: Fri, 6 Oct 2017 21:46:22 +0200
Subject: avoid possible illegal memory access

Is is possible that the number of results are greater than the result output array,
in this case invalid memory can be access within the device and on the host side.
---
 xmrstak/backend/amd/amd_gpu/gpu.cpp               |  4 +++
 xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 35 +++++++++++++++++------
 2 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'xmrstak/backend/amd')

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index d43961e..ca81718 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -912,6 +912,10 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 	}
 
 	clFinish(ctx->CommandQueues);
+	auto & numHashValues = HashOutput[0xFF];
+	// avoid out of memory read, we have only storage for 0xFF results
+	if(numHashValues > 0xFF)
+		numHashValues = 0xFF;
 	ctx->Nonce += g_intensity;
 
 	return ERR_SUCCESS;
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index 4fb8b0d..9c65425 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -552,7 +552,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 
 	states += 25 * gIdx;
 	Scratchpad += gIdx * (0x80000 >> 2);
-	
+
 	for(int i = get_local_id(0); i < 256; i += WORKSIZE)
 	{
 		const uint tmp = AES0_C[i];
@@ -562,7 +562,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 		AES3[i] = rotate(tmp, 24U);
 	}
 	barrier(CLK_LOCAL_MEM_FENCE);
-	
+
 	#if defined(__Tahiti__) || defined(__Pitcairn__)
 	
 	for(int i = 0; i < 4; ++i) ((ulong *)ExpandedKey2)[i] = states[i + 4];
@@ -677,8 +677,12 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
 	
 	//vstore8(p, 0, output);
 	
-	if(as_uint16(p).s7 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0);
-	
+	if(as_uint16(p).s7 <= Target)
+	{
+		ulong outIdx = atomic_inc(output + 0xFF);
+		if(outIdx < 0xFF)
+			output[outIdx] = BranchBuf[idx] + get_global_offset(0);
+	}
 	mem_fence(CLK_GLOBAL_MEM_FENCE);	
 }
 
@@ -743,7 +747,12 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
 	//output[2] = h7h;
 	//output[3] = h7l;
 	
-	if(as_uint2(h7l).s1 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0);
+	if(as_uint2(h7l).s1 <= Target)
+	{
+		ulong outIdx = atomic_inc(output + 0xFF);
+		if(outIdx < 0xFF)
+			output[outIdx] = BranchBuf[idx] + get_global_offset(0);
+	}
 }
 
 #define SWAP4(x)	as_uint(as_uchar4(x).s3210)
@@ -811,7 +820,12 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
 	for(int i = 0; i < 8; ++i) h[i] = SWAP4(h[i]);
 	
 	//for(int i = 0; i < 4; ++i) output[i] = ((ulong *)h)[i];
-	if(h[7] <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0);
+	if(h[7] <= Target)
+	{
+		ulong outIdx = atomic_inc(output + 0xFF);
+		if(outIdx < 0xFF)
+			output[outIdx] = BranchBuf[idx] + get_global_offset(0);
+	}
 }
 
 __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads)
@@ -863,7 +877,12 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
 	for(int i = 0; i < 8; ++i) State[i] ^= tmp[i];
 	
 	//for(int i = 0; i < 4; ++i) output[i] = State[i + 4];
-	if(as_uint2(State[7]).s1 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0);
+	if(as_uint2(State[7]).s1 <= Target)
+	{
+		ulong outIdx = atomic_inc(output + 0xFF);
+		if(outIdx < 0xFF)
+			output[outIdx] = BranchBuf[idx] + get_global_offset(0);
+	}
 }
 
-)==="
\ No newline at end of file
+)==="
-- 
cgit v1.1