add eon support to amd backend

- add compile parameter to support aeon and xmr - update auto suggestion to handle aeon
author: psychocrypt <psychocrypt@users.noreply.github.com> 2017-10-24 21:24:37 +0200
committer: psychocrypt <psychocrypt@users.noreply.github.com> 2017-10-27 20:12:01 +0200
commit: 2346c8be20939fe9c34cee441ac11644ec43cc58 (patch)
tree: 5c8c23365a3f1417854f4c5b92ef07e55c077a20 /xmrstak
parent: e0f0ad4d03af36d302485e46e22f47edba96b40d (diff)
download: xmr-stak-2346c8be20939fe9c34cee441ac11644ec43cc58.zip
xmr-stak-2346c8be20939fe9c34cee441ac11644ec43cc58.tar.gz
4 files changed, 48 insertions, 16 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 3575854..845d32c 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -13,6 +13,9 @@
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
 
+#include "../../cryptonight.hpp"
+#include "../../../jconf.hpp"
+
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
@@ -245,8 +248,24 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		return ERR_OCL_API;
 	}
 
+	size_t hashMemSize;
+	int threadMemMask;
+	int hasIterations;
+	if(::jconf::inst()->IsCurrencyXMR())
+	{
+		hashMemSize = XMR_MEMORY;
+		threadMemMask = XMR_MASK;
+		hasIterations = XMR_ITER;
+	}
+	else
+	{
+		hashMemSize = AEON_MEMORY;
+		threadMemMask = AEON_MASK;
+		hasIterations = AEON_ITER;
+	}
+
 	size_t g_thd = ctx->rawIntensity;
-	ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, (1 << 21) * g_thd, NULL, &ret);
+	ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, hashMemSize * g_thd, NULL, &ret);
 	if(ret != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret));
@@ -307,8 +326,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		return ERR_OCL_API;
 	}
 
-	char options[32];
-	snprintf(options, sizeof(options), "-I. -DWORKSIZE=%llu", int_port(ctx->workSize));
+	char options[256];
+	snprintf(options, sizeof(options), 
+		"-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu", hasIterations, threadMemMask, int_port(ctx->workSize));
 	ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL);
 	if(ret != CL_SUCCESS)
 	{
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index a6a5910..966199b 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -91,7 +91,7 @@ static const __constant ulong keccakf_rndc[24] =
     0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
     0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
     0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-    0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 
+    0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
     0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
     0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 };
@@ -440,7 +440,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
 	if(gIdx < Threads)
 	{
 		states += 25 * gIdx;
-		Scratchpad += gIdx * (0x80000 >> 2);
+		Scratchpad += gIdx * (ITERATIONS >> 2);
 
 		((ulong8 *)State)[0] = vload8(0, input);
 		State[8] = input[8];
@@ -482,7 +482,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
 	if(gIdx < Threads)
 	{
 		#pragma unroll 2
-		for(int i = 0; i < 0x4000; ++i)
+		for(int i = 0; i < (ITERATIONS >> 5); ++i)
 		{
 			#pragma unroll
 			for(int j = 0; j < 10; ++j)
@@ -519,7 +519,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 	if(gIdx < Threads)
 	{
 		states += 25 * gIdx;
-		Scratchpad += gIdx * (0x80000 >> 2);
+		Scratchpad += gIdx * (ITERATIONS >> 2);
 
 		a[0] = states[0] ^ states[4];
 		b[0] = states[2] ^ states[6];
@@ -535,23 +535,23 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 	if(gIdx < Threads)
 	{
 		#pragma unroll 8
-		for(int i = 0; i < 0x80000; ++i)
+		for(int i = 0; i < ITERATIONS; ++i)
 		{
 			ulong c[2];
 
-			((uint4 *)c)[0] = Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)];
+			((uint4 *)c)[0] = Scratchpad[IDX((a[0] & MASK) >> 4)];
 			((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
 			//b_x ^= ((uint4 *)c)[0];
 
-			Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)] = b_x ^ ((uint4 *)c)[0];
+			Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0];
 
 			uint4 tmp;
-			tmp = Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)];
+			tmp = Scratchpad[IDX((c[0] & MASK) >> 4)];
 
 			a[1] += c[0] * as_ulong2(tmp).s0;
 			a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
 
-			Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)] = ((uint4 *)a)[0];
+			Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
 
 			((uint4 *)a)[0] ^= tmp;
 
@@ -588,7 +588,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 	if(gIdx < Threads)
 	{
 		states += 25 * gIdx;
-		Scratchpad += gIdx * (0x80000 >> 2);
+		Scratchpad += gIdx * (ITERATIONS >> 2);
 
 		#if defined(__Tahiti__) || defined(__Pitcairn__)
 
@@ -611,7 +611,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 	if(gIdx < Threads)
 	{
 		#pragma unroll 2
-		for(int i = 0; i < 0x4000; ++i)
+		for(int i = 0; i < (ITERATIONS >> 5); ++i)
 		{
 			text ^= Scratchpad[IDX((i << 3) + get_local_id(1))];
 
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 2a22a08..01f279a 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -8,6 +8,8 @@
 #include "xmrstak/misc/console.hpp"
 #include "xmrstak/misc/configEditor.hpp"
 #include "xmrstak/params.hpp"
+#include "../cryptonight.hpp"
+#include "../../jconf.hpp"
 
 #include <vector>
 #include <cstdio>
@@ -81,6 +83,16 @@ private:
 
 		constexpr size_t byteToMiB = 1024u * 1024u;
 
+		size_t hashMemSize;
+		if(::jconf::inst()->IsCurrencyXMR())
+		{
+			hashMemSize = XMR_MEMORY;
+		}
+		else
+		{
+			hashMemSize = AEON_MEMORY;
+		}
+
 		std::string conf;
         int i = 0;
         for(auto& ctx : devVec)
@@ -88,7 +100,7 @@ private:
 			// keep 64MiB memory free (value is randomly chosen)
 			size_t availableMem = ctx.freeMem - (64u * 1024 * 1024);
 			// 224byte extra memory is used per thread for meta data
-			size_t perThread = (size_t(1u)<<21) + 224u;
+			size_t perThread = hashMemSize + 224u;
 			size_t max_intensity = availableMem / perThread;
 			// 1000 is a magic selected limit \todo select max intensity depending of the gpu type
 			size_t possibleIntensity = std::min( size_t(1000u) , max_intensity );
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index e262e0e..295ad31 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -183,7 +183,7 @@ void minethd::work_main()
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
-	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
+	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyXMR());
 	globalStates::inst().iConsumeCnt++;
 	
 	while (bQuit == 0)
author	psychocrypt <psychocrypt@users.noreply.github.com>	2017-10-24 21:24:37 +0200
committer	psychocrypt <psychocrypt@users.noreply.github.com>	2017-10-27 20:12:01 +0200
commit	2346c8be20939fe9c34cee441ac11644ec43cc58 (patch)
tree	5c8c23365a3f1417854f4c5b92ef07e55c077a20 /xmrstak
parent	e0f0ad4d03af36d302485e46e22f47edba96b40d (diff)
download	xmr-stak-2346c8be20939fe9c34cee441ac11644ec43cc58.zip xmr-stak-2346c8be20939fe9c34cee441ac11644ec43cc58.tar.gz