diff options
Diffstat (limited to 'xmrstak/backend')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 4 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.hpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 25 | ||||
-rw-r--r-- | xmrstak/backend/amd/minethd.cpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/miner_work.hpp | 6 |
5 files changed, 22 insertions, 17 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 37adc5b..3575854 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -645,7 +645,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_SUCCESS; } -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target) +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target) { cl_int ret; @@ -787,7 +787,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t tar } // Target - if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_uint), &target)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3); return ERR_OCL_API; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index c2d708d..123de01 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -45,7 +45,7 @@ int getAMDPlatformIdx(); std::vector<GpuContext> getAMDDevices(int index); size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target); size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput); diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index a1642c4..a6a5910 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -665,7 +665,7 @@ R"===( #define VSWAP4(x) ((((x) >> 24) & 0xFFU) | (((x) >> 8) & 0xFF00U) | (((x) << 8) & 0xFF0000U) | (((x) << 24) & 0xFF000000U)) -__kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +__kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads) { const ulong idx = get_global_id(0) - get_global_offset(0); @@ -713,7 +713,9 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u //vstore8(p, 0, output); - if(as_uint16(p).s7 <= Target) + // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values + // and expect an accurate result for target > 32-bit without implementing carries + if(p.s3 <= Target) { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) @@ -725,7 +727,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u #define SWAP8(x) as_ulong(as_uchar8(x).s76543210) -__kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +__kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads) { const uint idx = get_global_id(0) - get_global_offset(0); @@ -785,7 +787,9 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint //output[2] = h7h; //output[3] = h7l; - if(as_uint2(h7l).s1 <= Target) + // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values + // and expect an accurate result for target > 32-bit without implementing carries + if(h7l <= Target) { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) @@ -796,7 +800,7 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint #define SWAP4(x) as_uint(as_uchar4(x).s3210) -__kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +__kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads) { const uint idx = get_global_id(0) - get_global_offset(0); @@ -859,7 +863,10 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u for(int i = 0; i < 8; ++i) h[i] = SWAP4(h[i]); - if(h[7] <= Target) + // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values + // and expect an accurate result for target > 32-bit without implementing carries + uint2 t = (uint2)(h[6],h[7]); + if( as_ulong(t) <= Target) { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) @@ -868,7 +875,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u } } -__kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +__kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads) { const uint idx = get_global_id(0) - get_global_offset(0); @@ -917,7 +924,9 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global for(int i = 0; i < 8; ++i) State[i] ^= tmp[i]; - if(as_uint2(State[7]).s1 <= Target) + // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values + // and expect an accurate result for target > 32-bit without implementing carries + if(State[7] <= Target) { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 9d18860..4cbac6d 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -191,7 +191,7 @@ void minethd::work_main() size_t round_ctr = 0; assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); - uint32_t target = oWork.iTarget32; + uint64_t target = oWork.iTarget; XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); if(oWork.bNiceHash) diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index 6b5720c..4bfe429 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -16,8 +16,6 @@ namespace xmrstak uint8_t bWorkBlob[112]; uint32_t iWorkSize; uint64_t iTarget; - // \todo remove workaround needed for amd - uint32_t iTarget32; bool bNiceHash; bool bStall; size_t iPoolId; @@ -41,7 +39,6 @@ namespace xmrstak iWorkSize = from.iWorkSize; iTarget = from.iTarget; - iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; bStall = from.bStall; iPoolId = from.iPoolId; @@ -53,7 +50,7 @@ namespace xmrstak return *this; } - miner_work(miner_work&& from) : iWorkSize(from.iWorkSize), iTarget(from.iTarget),iTarget32(from.iTarget32), + miner_work(miner_work&& from) : iWorkSize(from.iWorkSize), iTarget(from.iTarget), bStall(from.bStall), iPoolId(from.iPoolId) { assert(iWorkSize <= sizeof(bWorkBlob)); @@ -67,7 +64,6 @@ namespace xmrstak iWorkSize = from.iWorkSize; iTarget = from.iTarget; - iTarget32 = from.iTarget32; bNiceHash = from.bNiceHash; bStall = from.bStall; iPoolId = from.iPoolId; |