From 2346c8be20939fe9c34cee441ac11644ec43cc58 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 24 Oct 2017 21:24:37 +0200 Subject: add eon support to amd backend - add compile parameter to support aeon and xmr - update auto suggestion to handle aeon --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 26 ++++++++++++++++++++--- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 22 +++++++++---------- xmrstak/backend/amd/autoAdjust.hpp | 14 +++++++++++- xmrstak/backend/amd/minethd.cpp | 2 +- 4 files changed, 48 insertions(+), 16 deletions(-) (limited to 'xmrstak/backend/amd') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 3575854..845d32c 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -13,6 +13,9 @@ * along with this program. If not, see . */ +#include "../../cryptonight.hpp" +#include "../../../jconf.hpp" + #include #include #include @@ -245,8 +248,24 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } + size_t hashMemSize; + int threadMemMask; + int hasIterations; + if(::jconf::inst()->IsCurrencyXMR()) + { + hashMemSize = XMR_MEMORY; + threadMemMask = XMR_MASK; + hasIterations = XMR_ITER; + } + else + { + hashMemSize = AEON_MEMORY; + threadMemMask = AEON_MASK; + hasIterations = AEON_ITER; + } + size_t g_thd = ctx->rawIntensity; - ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, (1 << 21) * g_thd, NULL, &ret); + ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, hashMemSize * g_thd, NULL, &ret); if(ret != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret)); @@ -307,8 +326,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - char options[32]; - snprintf(options, sizeof(options), "-I. -DWORKSIZE=%llu", int_port(ctx->workSize)); + char options[256]; + snprintf(options, sizeof(options), + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu", hasIterations, threadMemMask, int_port(ctx->workSize)); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) { diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index a6a5910..966199b 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -91,7 +91,7 @@ static const __constant ulong keccakf_rndc[24] = 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; @@ -440,7 +440,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul if(gIdx < Threads) { states += 25 * gIdx; - Scratchpad += gIdx * (0x80000 >> 2); + Scratchpad += gIdx * (ITERATIONS >> 2); ((ulong8 *)State)[0] = vload8(0, input); State[8] = input[8]; @@ -482,7 +482,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul if(gIdx < Threads) { #pragma unroll 2 - for(int i = 0; i < 0x4000; ++i) + for(int i = 0; i < (ITERATIONS >> 5); ++i) { #pragma unroll for(int j = 0; j < 10; ++j) @@ -519,7 +519,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre if(gIdx < Threads) { states += 25 * gIdx; - Scratchpad += gIdx * (0x80000 >> 2); + Scratchpad += gIdx * (ITERATIONS >> 2); a[0] = states[0] ^ states[4]; b[0] = states[2] ^ states[6]; @@ -535,23 +535,23 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre if(gIdx < Threads) { #pragma unroll 8 - for(int i = 0; i < 0x80000; ++i) + for(int i = 0; i < ITERATIONS; ++i) { ulong c[2]; - ((uint4 *)c)[0] = Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)]; + ((uint4 *)c)[0] = Scratchpad[IDX((a[0] & MASK) >> 4)]; ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); //b_x ^= ((uint4 *)c)[0]; - Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)] = b_x ^ ((uint4 *)c)[0]; + Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0]; uint4 tmp; - tmp = Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)]; + tmp = Scratchpad[IDX((c[0] & MASK) >> 4)]; a[1] += c[0] * as_ulong2(tmp).s0; a[0] += mul_hi(c[0], as_ulong2(tmp).s0); - Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)] = ((uint4 *)a)[0]; + Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; ((uint4 *)a)[0] ^= tmp; @@ -588,7 +588,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u if(gIdx < Threads) { states += 25 * gIdx; - Scratchpad += gIdx * (0x80000 >> 2); + Scratchpad += gIdx * (ITERATIONS >> 2); #if defined(__Tahiti__) || defined(__Pitcairn__) @@ -611,7 +611,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u if(gIdx < Threads) { #pragma unroll 2 - for(int i = 0; i < 0x4000; ++i) + for(int i = 0; i < (ITERATIONS >> 5); ++i) { text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 2a22a08..01f279a 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -8,6 +8,8 @@ #include "xmrstak/misc/console.hpp" #include "xmrstak/misc/configEditor.hpp" #include "xmrstak/params.hpp" +#include "../cryptonight.hpp" +#include "../../jconf.hpp" #include #include @@ -81,6 +83,16 @@ private: constexpr size_t byteToMiB = 1024u * 1024u; + size_t hashMemSize; + if(::jconf::inst()->IsCurrencyXMR()) + { + hashMemSize = XMR_MEMORY; + } + else + { + hashMemSize = AEON_MEMORY; + } + std::string conf; int i = 0; for(auto& ctx : devVec) @@ -88,7 +100,7 @@ private: // keep 64MiB memory free (value is randomly chosen) size_t availableMem = ctx.freeMem - (64u * 1024 * 1024); // 224byte extra memory is used per thread for meta data - size_t perThread = (size_t(1u)<<21) + 224u; + size_t perThread = hashMemSize + 224u; size_t max_intensity = availableMem / perThread; // 1000 is a magic selected limit \todo select max intensity depending of the gpu type size_t possibleIntensity = std::min( size_t(1000u) , max_intensity ); diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index e262e0e..295ad31 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -183,7 +183,7 @@ void minethd::work_main() uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); - cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyXMR()); globalStates::inst().iConsumeCnt++; while (bQuit == 0) -- cgit v1.1