diff options
Diffstat (limited to 'xmrstak')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 41 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 | ||||
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 24 | ||||
-rw-r--r-- | xmrstak/backend/amd/autoAdjust.hpp | 3 | ||||
-rw-r--r-- | xmrstak/backend/amd/config.tpl | 12 | ||||
-rw-r--r-- | xmrstak/backend/amd/jconf.cpp | 10 | ||||
-rw-r--r-- | xmrstak/backend/amd/jconf.hpp | 1 | ||||
-rw-r--r-- | xmrstak/backend/amd/minethd.cpp | 1 |
8 files changed, 74 insertions, 19 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index f8f8a6e..8c4a40d 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -336,8 +336,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[256]; snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk)); + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d -DCOMP_MODE=%d", + hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk), ctx->compMode ? 1 : 0); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) { @@ -553,6 +553,8 @@ int getAMDPlatformIdx() clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); int platformIndex = -1; + // Mesa OpenCL is the fallback if no AMD or Apple OpenCL is found + int mesaPlatform = -1; if(clStatus == CL_SUCCESS) { @@ -563,13 +565,29 @@ int getAMDPlatformIdx() clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos || platformName.find("Apple") != std::string::npos) + if( platformName.find("Advanced Micro Devices") != std::string::npos || + platformName.find("Apple") != std::string::npos || + platformName.find("Mesa") != std::string::npos + ) { - platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); - break; + if(platformName.find("Mesa") != std::string::npos) + mesaPlatform = i; + else + { + // exit if AMD or Apple platform is found + platformIndex = i; + break; + } } } + // fall back to Mesa OpenCL + if(platformIndex == -1 && mesaPlatform != -1) + { + printer::inst()->print_msg(L0,"No AMD platform found select Mesa as OpenCL platform"); + platformIndex = mesaPlatform; + } } else printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); @@ -877,10 +895,15 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) size_t g_intensity = ctx->rawIntensity; size_t w_size = ctx->workSize; - // round up to next multiple of w_size - size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; - // number of global threads must be a multiple of the work group size (w_size) - assert(g_thd%w_size == 0); + size_t g_thd = g_intensity; + + if(ctx->compMode) + { + // round up to next multiple of w_size + size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + // number of global threads must be a multiple of the work group size (w_size) + assert(g_thd%w_size == 0); + } for(int i = 2; i < 6; ++i) { diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index abfad5c..8fb7168 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -26,6 +26,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + int compMode; /*Output vars*/ cl_device_id DeviceID; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 53299ec..dbe8991 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -14,6 +14,11 @@ R"===( * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/* For Mesa clover support */ +#ifdef cl_clang_storage_class_specifiers +# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable +#endif + #ifdef cl_amd_media_ops #pragma OPENCL EXTENSION cl_amd_media_ops : enable #else @@ -451,8 +456,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; @@ -483,9 +490,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_GLOBAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll for(int i = 0; i < 25; ++i) states[i] = State[i]; @@ -499,9 +507,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_LOCAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -536,9 +545,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre barrier(CLK_LOCAL_MEM_FENCE); uint4 b_x; - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -559,8 +569,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre mem_fence(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 8 for(int i = 0; i < ITERATIONS; ++i) @@ -612,8 +624,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -641,8 +655,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -659,8 +675,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_GLOBAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { if(!get_local_id(1)) { diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index b88d3ee..8d60b94 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -143,7 +143,8 @@ private: // set 8 threads per block (this is a good value for the most gpus) conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 4\n" + " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 4,\n" + " \"comp_mode\" : true\n" + " },\n"; } else diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 8914130..84251c7 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -1,9 +1,9 @@ R"===( /* * GPU configuration. You should play around with intensity and worksize as the fastest settings will vary. - * index - GPU index number usually starts from 0 - * intensity - Number of parallel GPU threads (nothing to do with CPU threads) - * worksize - Number of local GPU threads (nothing to do with CPU threads) + * index - GPU index number usually starts from 0 + * intensity - Number of parallel GPU threads (nothing to do with CPU threads) + * worksize - Number of local GPU threads (nothing to do with CPU threads) * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. * strided_index - switch memory pattern used for the scratch pad memory * 2 = chunked memory, chunk size is controlled by 'mem_chunk' @@ -13,9 +13,13 @@ R"===( * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk * this value is only used if 'strided_index' == 2 * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) + * comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows + * to use a intensity which is not the multiple of the worksize. + * If you set false and the intensity is not multiple of the worksize the miner can crash: + * in this case set the intensity to a multiple of the worksize or activate comp_mode. * "gpu_threads_conf" : * [ - * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 4 }, + * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 4, "comp_mode" : true }, * ], * If you do not wish to mine with your AMD GPU(s) then use: * "gpu_threads_conf" : diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index 22381e1..93ba709 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -106,15 +106,17 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk; + const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk, *compMode; idx = GetObjectMember(oThdConf, "index"); intensity = GetObjectMember(oThdConf, "intensity"); w_size = GetObjectMember(oThdConf, "worksize"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); stridedIndex = GetObjectMember(oThdConf, "strided_index"); memChunk = GetObjectMember(oThdConf, "mem_chunk"); + compMode = GetObjectMember(oThdConf, "comp_mode"); - if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || stridedIndex == nullptr || memChunk == nullptr) + if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || memChunk == nullptr || + stridedIndex == nullptr || compMode == nullptr) return false; if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) @@ -148,9 +150,13 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) return false; } + if(!compMode->IsBool()) + return false; + cfg.index = idx->GetUint64(); cfg.w_size = w_size->GetUint64(); cfg.intensity = intensity->GetUint64(); + cfg.compMode = compMode->GetBool(); if(aff->IsNumber()) cfg.cpu_aff = aff->GetInt64(); diff --git a/xmrstak/backend/amd/jconf.hpp b/xmrstak/backend/amd/jconf.hpp index 91e5d0d..580b69f 100644 --- a/xmrstak/backend/amd/jconf.hpp +++ b/xmrstak/backend/amd/jconf.hpp @@ -28,6 +28,7 @@ public: long long cpu_aff; int stridedIndex; int memChunk; + bool compMode; }; size_t GetThreadCount(); diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index ca5e163..8dfbce5 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -98,6 +98,7 @@ bool minethd::init_gpus() vGpuData[i].workSize = cfg.w_size; vGpuData[i].stridedIndex = cfg.stridedIndex; vGpuData[i].memChunk = cfg.memChunk; + vGpuData[i].compMode = cfg.compMode; } return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; |