diff options
Diffstat (limited to 'xmrstak/backend/amd/amd_gpu')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 17 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 19 |
3 files changed, 28 insertions, 9 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 054ffc4..2f16b67 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -332,8 +332,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[256]; snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk)); + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d -DCOMP_MODE=%d", + hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk), ctx->compMode ? 1 : 0); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) { @@ -873,10 +873,15 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) size_t g_intensity = ctx->rawIntensity; size_t w_size = ctx->workSize; - // round up to next multiple of w_size - size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; - // number of global threads must be a multiple of the work group size (w_size) - assert(g_thd%w_size == 0); + size_t g_thd = g_intensity; + + if(ctx->compMode) + { + // round up to next multiple of w_size + size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + // number of global threads must be a multiple of the work group size (w_size) + assert(g_thd%w_size == 0); + } for(int i = 2; i < 6; ++i) { diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index abfad5c..8fb7168 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -26,6 +26,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + int compMode; /*Output vars*/ cl_device_id DeviceID; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 53299ec..4bac68c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -451,8 +451,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; @@ -483,9 +485,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_GLOBAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll for(int i = 0; i < 25; ++i) states[i] = State[i]; @@ -499,9 +502,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_LOCAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -536,9 +540,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre barrier(CLK_LOCAL_MEM_FENCE); uint4 b_x; - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -559,8 +564,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre mem_fence(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 8 for(int i = 0; i < ITERATIONS; ++i) @@ -612,8 +619,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -641,8 +650,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -659,8 +670,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_GLOBAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { if(!get_local_id(1)) { |