summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/amd
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-10-24 23:22:28 +0200
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-10-24 23:22:28 +0200
commit8888a608987c85870a99bfa99bd6980018288666 (patch)
tree1d61261de24917fd46e74d91690cc70d06958052 /xmrstak/backend/amd
parent712f7b7bdb02d05aaffc5f70817aeb1edd17a2b3 (diff)
downloadxmr-stak-8888a608987c85870a99bfa99bd6980018288666.zip
xmr-stak-8888a608987c85870a99bfa99bd6980018288666.tar.gz
amd: use 64bit target
- remove 32bit target value - use always 64bit target for amd backend
Diffstat (limited to 'xmrstak/backend/amd')
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.cpp4
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.hpp2
-rw-r--r--xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl25
-rw-r--r--xmrstak/backend/amd/minethd.cpp2
4 files changed, 21 insertions, 12 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 37adc5b..3575854 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -645,7 +645,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
return ERR_SUCCESS;
}
-size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target)
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target)
{
cl_int ret;
@@ -787,7 +787,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t tar
}
// Target
- if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_uint), &target)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
return ERR_OCL_API;
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index c2d708d..123de01 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -45,7 +45,7 @@ int getAMDPlatformIdx();
std::vector<GpuContext> getAMDDevices(int index);
size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
-size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target);
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target);
size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput);
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index a1642c4..a6a5910 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -665,7 +665,7 @@ R"===(
#define VSWAP4(x) ((((x) >> 24) & 0xFFU) | (((x) >> 8) & 0xFF00U) | (((x) << 8) & 0xFF0000U) | (((x) << 24) & 0xFF000000U))
-__kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads)
+__kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
{
const ulong idx = get_global_id(0) - get_global_offset(0);
@@ -713,7 +713,9 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
//vstore8(p, 0, output);
- if(as_uint16(p).s7 <= Target)
+ // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
+ // and expect an accurate result for target > 32-bit without implementing carries
+ if(p.s3 <= Target)
{
ulong outIdx = atomic_inc(output + 0xFF);
if(outIdx < 0xFF)
@@ -725,7 +727,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
-__kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads)
+__kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
{
const uint idx = get_global_id(0) - get_global_offset(0);
@@ -785,7 +787,9 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
//output[2] = h7h;
//output[3] = h7l;
- if(as_uint2(h7l).s1 <= Target)
+ // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
+ // and expect an accurate result for target > 32-bit without implementing carries
+ if(h7l <= Target)
{
ulong outIdx = atomic_inc(output + 0xFF);
if(outIdx < 0xFF)
@@ -796,7 +800,7 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
#define SWAP4(x) as_uint(as_uchar4(x).s3210)
-__kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads)
+__kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
{
const uint idx = get_global_id(0) - get_global_offset(0);
@@ -859,7 +863,10 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
for(int i = 0; i < 8; ++i) h[i] = SWAP4(h[i]);
- if(h[7] <= Target)
+ // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
+ // and expect an accurate result for target > 32-bit without implementing carries
+ uint2 t = (uint2)(h[6],h[7]);
+ if( as_ulong(t) <= Target)
{
ulong outIdx = atomic_inc(output + 0xFF);
if(outIdx < 0xFF)
@@ -868,7 +875,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
}
}
-__kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads)
+__kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
{
const uint idx = get_global_id(0) - get_global_offset(0);
@@ -917,7 +924,9 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
for(int i = 0; i < 8; ++i) State[i] ^= tmp[i];
- if(as_uint2(State[7]).s1 <= Target)
+ // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
+ // and expect an accurate result for target > 32-bit without implementing carries
+ if(State[7] <= Target)
{
ulong outIdx = atomic_inc(output + 0xFF);
if(outIdx < 0xFF)
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 9d18860..4cbac6d 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -191,7 +191,7 @@ void minethd::work_main()
size_t round_ctr = 0;
assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
- uint32_t target = oWork.iTarget32;
+ uint64_t target = oWork.iTarget;
XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
if(oWork.bNiceHash)
OpenPOWER on IntegriCloud