From 9ef2a83be3ddcc36546277f996c9387c38090014 Mon Sep 17 00:00:00 2001 From: Takeshi Suzuki Date: Thu, 5 Apr 2018 20:58:14 -0500 Subject: Fix spelling mistake in gpu.cpp --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 006a7ed..13f018e 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -378,7 +378,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[512]; snprintf(options, sizeof(options), "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d -DMEMORY=%llu -DALGO=%d", - hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0, + hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0, int_port(hashMemSize), int(miner_algo)); /* create a hash for the compile time cache * used data: @@ -497,7 +497,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ } else { - printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled cod from file %s",ctx->deviceIdx, cache_file.c_str()); + printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled code from file %s",ctx->deviceIdx, cache_file.c_str()); std::ostringstream ss; ss << clBinFile.rdbuf(); std::string s = ss.str(); -- cgit v1.1 From 75ae1dd60165141700c36ab699656e9e073e186d Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Thu, 5 Apr 2018 23:19:48 -0600 Subject: Repair all 'namepsace' to 'namespace' (all within comments) --- xmrstak/backend/amd/autoAdjust.hpp | 2 +- xmrstak/backend/backendConnector.cpp | 2 +- xmrstak/backend/backendConnector.hpp | 2 +- xmrstak/backend/cpu/autoAdjust.hpp | 2 +- xmrstak/backend/cpu/autoAdjustHwloc.hpp | 2 +- xmrstak/backend/cpu/jconf.cpp | 2 +- xmrstak/backend/cpu/jconf.hpp | 2 +- xmrstak/backend/cpu/minethd.cpp | 2 +- xmrstak/backend/cpu/minethd.hpp | 2 +- xmrstak/backend/globalStates.cpp | 2 +- xmrstak/backend/globalStates.hpp | 2 +- xmrstak/backend/iBackend.hpp | 2 +- xmrstak/backend/miner_work.hpp | 2 +- xmrstak/backend/nvidia/autoAdjust.hpp | 2 +- xmrstak/backend/nvidia/jconf.hpp | 2 +- xmrstak/backend/nvidia/minethd.hpp | 2 +- xmrstak/backend/plugin.hpp | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index ea057a0..c798cf3 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -155,4 +155,4 @@ private: }; } // namespace amd -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp index d735cb3..acedbd6 100644 --- a/xmrstak/backend/backendConnector.cpp +++ b/xmrstak/backend/backendConnector.cpp @@ -99,4 +99,4 @@ std::vector* BackendConnector::thread_starter(miner_work& pWork) return pvThreads; } -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/backendConnector.hpp b/xmrstak/backend/backendConnector.hpp index da3dc77..66d873e 100644 --- a/xmrstak/backend/backendConnector.hpp +++ b/xmrstak/backend/backendConnector.hpp @@ -18,4 +18,4 @@ namespace xmrstak static bool self_test(); }; -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index 969d478..abba8b6 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -172,4 +172,4 @@ private: }; } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index 568abb5..68d2b3f 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -214,4 +214,4 @@ private: }; } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/jconf.cpp b/xmrstak/backend/cpu/jconf.cpp index 399dd16..49da7ae 100644 --- a/xmrstak/backend/cpu/jconf.cpp +++ b/xmrstak/backend/cpu/jconf.cpp @@ -259,4 +259,4 @@ bool jconf::parse_config(const char* sFilename) } } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/jconf.hpp b/xmrstak/backend/cpu/jconf.hpp index e98ed16..be85503 100644 --- a/xmrstak/backend/cpu/jconf.hpp +++ b/xmrstak/backend/cpu/jconf.hpp @@ -40,4 +40,4 @@ private: }; } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 5b56f85..ec3e145 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -769,4 +769,4 @@ void minethd::multiway_work_main() } } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index cd78343..59583b6 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -65,4 +65,4 @@ private: }; } // namespace cpu -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp index 78823c5..1ec7983 100644 --- a/xmrstak/backend/globalStates.cpp +++ b/xmrstak/backend/globalStates.cpp @@ -53,4 +53,4 @@ void globalStates::switch_work(miner_work& pWork, pool_data& dat) iGlobalJobNo++; } -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp index 442be69..fafd232 100644 --- a/xmrstak/backend/globalStates.hpp +++ b/xmrstak/backend/globalStates.hpp @@ -55,4 +55,4 @@ private: } }; -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp index 3d2115a..fdc647e 100644 --- a/xmrstak/backend/iBackend.hpp +++ b/xmrstak/backend/iBackend.hpp @@ -47,4 +47,4 @@ namespace xmrstak } }; -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index 9e5a4e4..438ec0d 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -81,4 +81,4 @@ namespace xmrstak } }; -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index be7d1ce..0b1da87 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -109,4 +109,4 @@ private: }; } // namespace nvidia -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp index 7f60f1d..b4ebaa0 100644 --- a/xmrstak/backend/nvidia/jconf.hpp +++ b/xmrstak/backend/nvidia/jconf.hpp @@ -49,4 +49,4 @@ private: }; } // namespace nvidia -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp index fcd24fa..89c2944 100644 --- a/xmrstak/backend/nvidia/minethd.hpp +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -60,4 +60,4 @@ private: }; } // namespace nvidia -} // namepsace xmrstak +} // namespace xmrstak diff --git a/xmrstak/backend/plugin.hpp b/xmrstak/backend/plugin.hpp index 89cdf97..2610db8 100644 --- a/xmrstak/backend/plugin.hpp +++ b/xmrstak/backend/plugin.hpp @@ -109,4 +109,4 @@ struct plugin * */ }; -} // namepsace xmrstak +} // namespace xmrstak -- cgit v1.1 From 5fd4520afa4dd824676f9e24e3f3c771687a6a39 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Thu, 5 Apr 2018 23:30:19 -0600 Subject: Repair more typos in comments only --- xmrstak/backend/cpu/crypto/skein_port.h | 2 +- xmrstak/backend/nvidia/autoAdjust.hpp | 2 +- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/cpu/crypto/skein_port.h b/xmrstak/backend/cpu/crypto/skein_port.h index 9cbefcb..99641bc 100644 --- a/xmrstak/backend/cpu/crypto/skein_port.h +++ b/xmrstak/backend/cpu/crypto/skein_port.h @@ -49,7 +49,7 @@ multiple of size / 8) ptr_cast(x,size) casts a pointer to a pointer to a - varaiable of length 'size' bits + variable of length 'size' bits */ #define ui_type(size) uint##size##_t diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index 0b1da87..d8bb621 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -50,7 +50,7 @@ public: ctx.device_blocks = -1; ctx.device_threads = -1; - // set all evice option those marked as auto (-1) to a valid value + // set all device option those marked as auto (-1) to a valid value #ifndef _WIN32 ctx.device_bfactor = 0; ctx.device_bsleep = 0; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 02c157e..ead93c5 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -458,7 +458,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) #undef XMRSTAK_PP_TOSTRING1 std::stringstream ss(archStringList); - //transform string list sperated with `+` into a vector of integers + //transform string list separated with `+` into a vector of integers int tmpArch; while ( ss >> tmpArch ) arch.push_back( tmpArch ); @@ -492,7 +492,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) } } - // set all evice option those marked as auto (-1) to a valid value + // set all device option those marked as auto (-1) to a valid value if(ctx->device_blocks == -1) { /* good values based of my experience -- cgit v1.1 From a57976439b1322eecb5e29198f3d9676d6f06909 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 7 Apr 2018 22:53:43 +0200 Subject: amd simplify kernel for different algorithms - remove version numbers within the kernel - create seperate program context for each mining algorithm - remove kernel `cn1_monero` is now integrated in `cn1` - remname `cnX` kernel in `cnX + algorithmNumber` --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 413 +++++++++++----------- xmrstak/backend/amd/amd_gpu/gpu.hpp | 8 +- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 301 +++++----------- xmrstak/backend/amd/minethd.cpp | 4 +- 4 files changed, 295 insertions(+), 431 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 13f018e..b9cc9b6 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -307,12 +308,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t scratchPadSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); int threadMemMask = cn_select_mask(::jconf::inst()->GetMiningAlgo()); int hashIterations = cn_select_iter(::jconf::inst()->GetMiningAlgo()); size_t g_thd = ctx->rawIntensity; - ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, hashMemSize * g_thd, NULL, &ret); + ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, scratchPadSize * g_thd, NULL, &ret); if(ret != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret)); @@ -373,167 +374,202 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - auto miner_algo = ::jconf::inst()->GetMiningAlgo(); + xmrstak_algo miner_algo[2] = { + ::jconf::inst()->GetMiningAlgo(), + ::jconf::inst()->GetMiningAlgoRoot() + }; + int num_algos = miner_algo[0] == miner_algo[1] ? 1 : 2; - char options[512]; - snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d -DMEMORY=%llu -DALGO=%d", - hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0, - int_port(hashMemSize), int(miner_algo)); - /* create a hash for the compile time cache - * used data: - * - source code - * - device name - * - compile paramater - */ - std::string src_str(source_code); - src_str += options; - src_str += devNameVec.data(); - std::string hash_hex_str; - picosha2::hash256_hex_string(src_str, hash_hex_str); - - std::string cache_file = get_home() + "/.openclcache/" + hash_hex_str + ".openclbin"; - std::ifstream clBinFile(cache_file, std::ofstream::in | std::ofstream::binary); - if(xmrstak::params::inst().AMDCache == false || !clBinFile.good()) + for(int ii = 0; ii < num_algos; ++ii) { - if(xmrstak::params::inst().AMDCache) - printer::inst()->print_msg(L1,"OpenCL device %u - Precompiled code %s not found. Compiling ...",ctx->deviceIdx, cache_file.c_str()); - ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); - if(ret != CL_SUCCESS) - { - printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret)); - return ERR_OCL_API; - } + size_t hashMemSize = cn_select_memory(miner_algo[ii]); + int threadMemMask = cn_select_mask(miner_algo[ii]); + int hashIterations = cn_select_iter(miner_algo[ii]); - ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); - if(ret != CL_SUCCESS) + char options[512]; + snprintf(options, sizeof(options), + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d -DMEMORY=%llu -DALGO=%d", + hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0, + int_port(hashMemSize), int(miner_algo[ii])); + /* create a hash for the compile time cache + * used data: + * - source code + * - device name + * - compile paramater + */ + std::string src_str(source_code); + src_str += options; + src_str += devNameVec.data(); + std::string hash_hex_str; + picosha2::hash256_hex_string(src_str, hash_hex_str); + + std::string cache_file = get_home() + "/.openclcache/" + hash_hex_str + ".openclbin"; + std::ifstream clBinFile(cache_file, std::ofstream::in | std::ofstream::binary); + if(xmrstak::params::inst().AMDCache == false || !clBinFile.good()) { - size_t len; - printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); - - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + if(xmrstak::params::inst().AMDCache) + printer::inst()->print_msg(L1,"OpenCL device %u - Precompiled code %s not found. Compiling ...",ctx->deviceIdx, cache_file.c_str()); + ctx->Program[ii] = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret)); return ERR_OCL_API; } - char* BuildLog = (char*)malloc(len + 1); - BuildLog[0] = '\0'; - - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + ret = clBuildProgram(ctx->Program[ii], 1, &ctx->DeviceID, options, NULL, NULL); + if(ret != CL_SUCCESS) { + size_t len; + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + + if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* BuildLog = (char*)malloc(len + 1); + BuildLog[0] = '\0'; + + if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + { + free(BuildLog); + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_str("Build log:\n"); + std::cerr<print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); return ERR_OCL_API; } - printer::inst()->print_str("Build log:\n"); - std::cerr<Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL); + cl_uint num_devices; + clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL); - std::vector devices_ids(num_devices); - clGetProgramInfo(ctx->Program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL); - int dev_id = 0; - /* Search for the gpu within the program context. - * The id can be different to ctx->DeviceID. - */ - for(auto & ocl_device : devices_ids) - { - if(ocl_device == ctx->DeviceID) - break; - dev_id++; - } + std::vector devices_ids(num_devices); + clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL); + int dev_id = 0; + /* Search for the gpu within the program context. + * The id can be different to ctx->DeviceID. + */ + for(auto & ocl_device : devices_ids) + { + if(ocl_device == ctx->DeviceID) + break; + dev_id++; + } - cl_build_status status; - do - { - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + cl_build_status status; + do { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); - return ERR_OCL_API; + if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + return ERR_OCL_API; + } + port_sleep(1); } - port_sleep(1); - } - while(status == CL_BUILD_IN_PROGRESS); + while(status == CL_BUILD_IN_PROGRESS); + + std::vector binary_sizes(num_devices); + clGetProgramInfo (ctx->Program[ii], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL); - std::vector binary_sizes(num_devices); - clGetProgramInfo (ctx->Program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL); + std::vector all_programs(num_devices); + std::vector> program_storage; - std::vector all_programs(num_devices); - std::vector> program_storage; + if(xmrstak::params::inst().AMDCache) + { + int p_id = 0; + size_t mem_size = 0; + // create memory structure to query all OpenCL program binaries + for(auto & p : all_programs) + { + program_storage.emplace_back(std::vector(binary_sizes[p_id])); + all_programs[p_id] = program_storage[p_id].data(); + mem_size += binary_sizes[p_id]; + p_id++; + } - if(xmrstak::params::inst().AMDCache) + if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret)); + return ERR_OCL_API; + } + + std::ofstream file_stream; + file_stream.open(cache_file, std::ofstream::out | std::ofstream::binary); + file_stream.write(all_programs[dev_id], binary_sizes[dev_id]); + file_stream.close(); + printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code stored in file %s",ctx->deviceIdx, cache_file.c_str()); + } + } + else { - int p_id = 0; - size_t mem_size = 0; - // create memory structure to query all OpenCL program binaries - for(auto & p : all_programs) + printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled code from file %s",ctx->deviceIdx, cache_file.c_str()); + std::ostringstream ss; + ss << clBinFile.rdbuf(); + std::string s = ss.str(); + + size_t bin_size = s.size(); + auto data_ptr = s.data(); + + cl_int clStatus; + ctx->Program[ii] = clCreateProgramWithBinary( + opencl_ctx, 1, &ctx->DeviceID, &bin_size, + (const unsigned char **)&data_ptr, &clStatus, &ret + ); + if(ret != CL_SUCCESS) { - program_storage.emplace_back(std::vector(binary_sizes[p_id])); - all_programs[p_id] = program_storage[p_id].data(); - mem_size += binary_sizes[p_id]; - p_id++; + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str()); + return ERR_OCL_API; } - - if((ret = clGetProgramInfo(ctx->Program, CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS) + ret = clBuildProgram(ctx->Program[ii], 1, &ctx->DeviceID, NULL, NULL, NULL); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret)); + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str()); return ERR_OCL_API; } - - std::ofstream file_stream; - file_stream.open(cache_file, std::ofstream::out | std::ofstream::binary); - file_stream.write(all_programs[dev_id], binary_sizes[dev_id]); - file_stream.close(); - printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code stored in file %s",ctx->deviceIdx, cache_file.c_str()); } - } - else - { - printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled code from file %s",ctx->deviceIdx, cache_file.c_str()); - std::ostringstream ss; - ss << clBinFile.rdbuf(); - std::string s = ss.str(); - - size_t bin_size = s.size(); - auto data_ptr = s.data(); - - cl_int clStatus; - ctx->Program = clCreateProgramWithBinary( - opencl_ctx, 1, &ctx->DeviceID, &bin_size, - (const unsigned char **)&data_ptr, &clStatus, &ret - ); - if(ret != CL_SUCCESS) + + std::vector KernelNames = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; + // append algorithm number to kernel name + for(int k = 0; k < 3; k++) + KernelNames[k] += std::to_string(miner_algo[ii]); + + if(ii == 0) { - printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str()); - return ERR_OCL_API; + for(int i = 0; i < 7; ++i) + { + ctx->Kernels[ii][i] = clCreateKernel(ctx->Program[ii], KernelNames[i].c_str(), &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_0 %s.", err_to_str(ret), KernelNames[i].c_str()); + return ERR_OCL_API; + } + } } - ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, NULL, NULL, NULL); - if(ret != CL_SUCCESS) + else { - printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str()); - return ERR_OCL_API; - } - } + for(int i = 0; i < 3; ++i) + { + ctx->Kernels[ii][i] = clCreateKernel(ctx->Program[ii], KernelNames[i].c_str(), &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_1 %s.", err_to_str(ret), KernelNames[i].c_str()); + return ERR_OCL_API; + } + } + // move kernel from the main algorithm into the root algorithm kernel space + for(int i = 3; i < 7; ++i) + { + ctx->Kernels[ii][i] = ctx->Kernels[0][i]; + } - const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein", "cn1_monero" }; - for(int i = 0; i < 8; ++i) - { - ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret); - if(ret != CL_SUCCESS) - { - printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel %s.", err_to_str(ret), KernelNames[i]); - return ERR_OCL_API; } } - ctx->Nonce = 0; return 0; } @@ -881,8 +917,11 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_SUCCESS; } -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version) +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo) { + // switch to the kernel storage + int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1; + cl_int ret; if(input_len > 84) @@ -899,71 +938,51 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return ERR_OCL_API; } - if((ret = clSetKernelArg(ctx->Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret)); return ERR_OCL_API; } // Scratchpads - if((ret = clSetKernelArg(ctx->Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret)); return ERR_OCL_API; } // States - if((ret = clSetKernelArg(ctx->Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret)); return ERR_OCL_API; } // Threads - if((ret = clSetKernelArg(ctx->Kernels[0], 3, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret)); return(ERR_OCL_API); } - /* ATTENTION: if we miner cryptonight_heavy the kernel needs an additional parameter version. - * Do NOT use the variable `miner_algo` because this variable is changed dynamicly - */ - if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy) - { - // version - if ((ret = clSetKernelArg(ctx->Kernels[0], 4, sizeof(cl_uint), &version)) != CL_SUCCESS) - { - printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 4.", err_to_str(ret)); - return ERR_OCL_API; - } - } - // CN1 Kernel - /// @todo only activate if currency is monero - int cn_kernel_offset = 0; - if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon) - { - cn_kernel_offset = 6; - } - // Scratchpads - if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret)); return ERR_OCL_API; } // States - if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret)); return ERR_OCL_API; } // Threads - if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret)); return(ERR_OCL_API); @@ -972,113 +991,88 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon ) { // Input - if ((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) + if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) { printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, arugment 4(input buffer).", err_to_str(ret)); return ERR_OCL_API; } } - /* ATTENTION: if we miner cryptonight_heavy the kernel needs an additional parameter version. - * Do NOT use the variable `miner_algo` because this variable is changed dynamicly - */ - else if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy) - { - // version - if ((ret = clSetKernelArg(ctx->Kernels[1], 3, sizeof(cl_uint), &version)) != CL_SUCCESS) - { - printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 3 (version).", err_to_str(ret)); - return ERR_OCL_API; - } - } // CN3 Kernel // Scratchpads - if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret)); return ERR_OCL_API; } // States - if((ret = clSetKernelArg(ctx->Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret)); return ERR_OCL_API; } // Branch 0 - if((ret = clSetKernelArg(ctx->Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret)); return ERR_OCL_API; } // Branch 1 - if((ret = clSetKernelArg(ctx->Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret)); return ERR_OCL_API; } // Branch 2 - if((ret = clSetKernelArg(ctx->Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret)); return ERR_OCL_API; } // Branch 3 - if((ret = clSetKernelArg(ctx->Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret)); return ERR_OCL_API; } // Threads - if((ret = clSetKernelArg(ctx->Kernels[2], 6, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret)); return(ERR_OCL_API); } - /* ATTENTION: if we miner cryptonight_heavy the kernel needs an additional parameter version. - * Do NOT use the variable `miner_algo` because this variable is changed dynamicly - */ - if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy) - { - // version - if ((ret = clSetKernelArg(ctx->Kernels[2], 7, sizeof(cl_uint), &version)) != CL_SUCCESS) - { - printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 7.", err_to_str(ret)); - return ERR_OCL_API; - } - } - for(int i = 0; i < 4; ++i) { // States - if((ret = clSetKernelArg(ctx->Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0); return ERR_OCL_API; } // Nonce buffer - if((ret = clSetKernelArg(ctx->Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1); return ERR_OCL_API; } // Output - if((ret = clSetKernelArg(ctx->Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2); return ERR_OCL_API; } // Target - if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3); return ERR_OCL_API; @@ -1088,8 +1082,11 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return ERR_SUCCESS; } -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version) +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) { + // switch to the kernel storage + int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1; + cl_int ret; cl_uint zero = 0; size_t BranchNonces[4]; @@ -1125,35 +1122,21 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, clFinish(ctx->CommandQueues); size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 }; - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0); return ERR_OCL_API; } - /*for(int i = 1; i < 3; ++i) - { - if((ret = clEnqueueNDRangeKernel(*ctx->CommandQueues, ctx->Kernels[i], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) - { - Log(LOG_CRITICAL, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i); - return(ERR_OCL_API); - } - }*/ - size_t tmpNonce = ctx->Nonce; - /// @todo only activate if currency is monero - int cn_kernel_offset = 0; - if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon) - { - cn_kernel_offset = 6; - } - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1 + cn_kernel_offset], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1); return ERR_OCL_API; } - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2); return ERR_OCL_API; @@ -1190,7 +1173,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, if(BranchNonces[i]) { // Threads - if((clSetKernelArg(ctx->Kernels[i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS) + if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4); return(ERR_OCL_API); @@ -1201,7 +1184,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, // number of global threads must be a multiple of the work group size (w_size) assert(BranchNonces[i]%w_size == 0); size_t tmpNonce = ctx->Nonce; - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3); return ERR_OCL_API; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index a387b15..0db6c90 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -35,8 +35,8 @@ struct GpuContext cl_mem InputBuffer; cl_mem OutputBuffer; cl_mem ExtraBuffers[6]; - cl_program Program; - cl_kernel Kernels[8]; + cl_program Program[2]; + cl_kernel Kernels[2][8]; size_t freeMem; int computeUnits; std::string name; @@ -50,7 +50,7 @@ int getAMDPlatformIdx(); std::vector getAMDDevices(int index); size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version); -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo); +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo); diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 5d4e66c..d2ae1a7 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -433,15 +433,13 @@ inline ulong getIdx() #endif } -#define mix_and_propagate(xin) (xin)[(get_local_id(1)) % 8][get_local_id(0)] ^ (xin)[(get_local_id(1) + 1) % 8][get_local_id(0)] +#define mix_and_propagate(xin) (xin)[(get_local_id(1)) % 8][get_local_id(0)] ^ (xin)[(get_local_id(1) + 1) % 8][get_local_id(0)] + +#define JOIN_DO(x,y) x##y +#define JOIN(x,y) JOIN_DO(x,y) __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) -__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads -// cryptonight_heavy -#if (ALGO == 4) - , uint version -#endif -) +__kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads) { ulong State[25]; uint ExpandedKey1[40]; @@ -517,23 +515,20 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul // cryptonight_heavy #if (ALGO == 4) - if(version >= 3) - { - __local uint4 xin[8][WORKSIZE]; + __local uint4 xin[8][WORKSIZE]; - /* Also left over threads performe this loop. - * The left over thread results will be ignored - */ - for(size_t i=0; i < 16; i++) - { - #pragma unroll - for(int j = 0; j < 10; ++j) - text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey1)[j]); - barrier(CLK_LOCAL_MEM_FENCE); - xin[get_local_id(1)][get_local_id(0)] = text; - barrier(CLK_LOCAL_MEM_FENCE); - text = mix_and_propagate(xin); - } + /* Also left over threads performe this loop. + * The left over thread results will be ignored + */ + for(size_t i=0; i < 16; i++) + { + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey1)[j]); + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); } #endif @@ -542,13 +537,9 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul if(gIdx < Threads) #endif { - int iterations = MEMORY >> 7; -#if (ALGO == 4) - if(version < 3) - iterations >>= 1; -#endif + #pragma unroll 2 - for(int i = 0; i < iterations; ++i) + for(int i = 0; i < (MEMORY >> 7); ++i) { #pragma unroll for(int j = 0; j < 10; ++j) @@ -560,22 +551,13 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul mem_fence(CLK_GLOBAL_MEM_FENCE); } -#define VARIANT1_1(p) \ - uint table = 0x75310U; \ - uint index = (((p).s2 >> 26) & 12) | (((p).s2 >> 23) & 2); \ - (p).s2 ^= ((table >> index) & 0x30U) << 24 - -#define VARIANT1_2(p) ((uint2 *)&(p))[0] ^= tweak1_2 - -#define VARIANT1_INIT() \ - tweak1_2 = as_uint2(input[4]); \ - tweak1_2.s0 >>= 24; \ - tweak1_2.s0 |= tweak1_2.s1 << 8; \ - tweak1_2.s1 = get_global_id(0); \ - tweak1_2 ^= as_uint2(states[24]) - __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulong Threads, __global ulong *input) +__kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states, ulong Threads +// cryptonight_monero || cryptonight_aeon +#if(ALGO == 3 || ALGO == 5) +, __global ulong *input +#endif +) { ulong a[2], b[2]; __local uint AES0[256], AES1[256], AES2[256], AES3[256]; @@ -592,8 +574,9 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulo } barrier(CLK_LOCAL_MEM_FENCE); - +#if(ALGO == 3 || ALGO == 5) uint2 tweak1_2; +#endif uint4 b_x; #if(COMP_MODE==1) // do not use early return here @@ -615,7 +598,13 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulo b[1] = states[3] ^ states[7]; b_x = ((uint4 *)b)[0]; - VARIANT1_INIT(); +#if(ALGO == 3 || ALGO == 5) + tweak1_2 = as_uint2(input[4]); + tweak1_2.s0 >>= 24; + tweak1_2.s0 |= tweak1_2.s1 << 8; + tweak1_2.s1 = get_global_id(0); + tweak1_2 ^= as_uint2(states[24]); +#endif } mem_fence(CLK_LOCAL_MEM_FENCE); @@ -625,17 +614,23 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulo if(gIdx < Threads) #endif { + ulong idx0 = a[0]; + #pragma unroll 8 for(int i = 0; i < ITERATIONS; ++i) { ulong c[2]; - ((uint4 *)c)[0] = Scratchpad[IDX((a[0] & MASK) >> 4)]; + ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)]; ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); b_x ^= ((uint4 *)c)[0]; - VARIANT1_1(b_x); - Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x; +#if(ALGO == 3 || ALGO == 5) + uint table = 0x75310U; + uint index = ((b_x.s2 >> 26) & 12) | ((b_x.s2 >> 23) & 2); + b_x.s2 ^= ((table >> index) & 0x30U) << 24; +#endif + Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x; uint4 tmp; tmp = Scratchpad[IDX((c[0] & MASK) >> 4)]; @@ -643,101 +638,14 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulo a[1] += c[0] * as_ulong2(tmp).s0; a[0] += mul_hi(c[0], as_ulong2(tmp).s0); - VARIANT1_2(a[1]); - Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; - VARIANT1_2(a[1]); - - ((uint4 *)a)[0] ^= tmp; - - b_x = ((uint4 *)c)[0]; - } - } - mem_fence(CLK_GLOBAL_MEM_FENCE); -} - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Threads -// cryptonight_heavy -#if (ALGO == 4) - , uint version -#endif -) -{ - ulong a[2], b[2]; - __local uint AES0[256], AES1[256], AES2[256], AES3[256]; - - const ulong gIdx = getIdx(); - - for(int i = get_local_id(0); i < 256; i += WORKSIZE) - { - const uint tmp = AES0_C[i]; - AES0[i] = tmp; - AES1[i] = rotate(tmp, 8U); - AES2[i] = rotate(tmp, 16U); - AES3[i] = rotate(tmp, 24U); - } - - barrier(CLK_LOCAL_MEM_FENCE); - - uint4 b_x; -#if(COMP_MODE==1) - // do not use early return here - if(gIdx < Threads) -#endif - { - states += 25 * gIdx; -#if(STRIDED_INDEX==0) - Scratchpad += gIdx * (MEMORY >> 4); -#elif(STRIDED_INDEX==1) - Scratchpad += gIdx; -#elif(STRIDED_INDEX==2) - Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); -#endif - - a[0] = states[0] ^ states[4]; - b[0] = states[2] ^ states[6]; - a[1] = states[1] ^ states[5]; - b[1] = states[3] ^ states[7]; - - b_x = ((uint4 *)b)[0]; - } - - mem_fence(CLK_LOCAL_MEM_FENCE); - -#if(COMP_MODE==1) - // do not use early return here - if(gIdx < Threads) -#endif - { - ulong idx0 = a[0]; - ulong mask = MASK; - int iterations = ITERATIONS; -#if (ALGO == 4) - if(version < 3) - { - iterations <<= 1; - mask -= 0x200000; - } +#if(ALGO == 3 || ALGO == 5) + ((uint2 *)&(a[1]))[0] ^= tweak1_2; + Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; + ((uint2 *)&(a[1]))[0] ^= tweak1_2; +#else + Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; #endif - #pragma unroll 8 - for(int i = 0; i < iterations; ++i) - { - ulong c[2]; - - ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & mask) >> 4)]; - ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); - //b_x ^= ((uint4 *)c)[0]; - - Scratchpad[IDX((idx0 & mask) >> 4)] = b_x ^ ((uint4 *)c)[0]; - - uint4 tmp; - tmp = Scratchpad[IDX((c[0] & mask) >> 4)]; - - a[1] += c[0] * as_ulong2(tmp).s0; - a[0] += mul_hi(c[0], as_ulong2(tmp).s0); - - Scratchpad[IDX((c[0] & mask) >> 4)] = ((uint4 *)a)[0]; ((uint4 *)a)[0] ^= tmp; idx0 = a[0]; @@ -745,14 +653,11 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre b_x = ((uint4 *)c)[0]; // cryptonight_heavy #if (ALGO == 4) - if(version >= 3) - { - long n = *((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4)))); - int d = ((__global int*)(Scratchpad + (IDX((idx0 & mask) >> 4))))[2]; - long q = n / (d | 0x5); - *((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4)))) = n ^ q; - idx0 = d ^ q; - } + long n = *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))); + int d = ((__global int*)(Scratchpad + (IDX((idx0 & MASK) >> 4))))[2]; + long q = n / (d | 0x5); + *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n ^ q; + idx0 = d ^ q; #endif } } @@ -760,12 +665,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre } __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) -__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads -// cryptonight_heavy -#if (ALGO == 4) - , uint version -#endif - ) +__kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads) { __local uint AES0[256], AES1[256], AES2[256], AES3[256]; uint ExpandedKey2[40]; @@ -827,58 +727,42 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u if(gIdx < Threads) #endif { - int iterations = MEMORY >> 7; #if (ALGO == 4) - if(version < 3) - { - iterations >>= 1; - #pragma unroll 2 - for(int i = 0; i < iterations; ++i) - { - text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; - - #pragma unroll 10 - for(int j = 0; j < 10; ++j) - text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); - } - } - else + #pragma unroll 2 + for(int i = 0; i < (MEMORY >> 7); ++i) { - #pragma unroll 2 - for(int i = 0; i < iterations; ++i) - { - text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; - #pragma unroll 10 - for(int j = 0; j < 10; ++j) - text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + #pragma unroll 10 + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); - barrier(CLK_LOCAL_MEM_FENCE); - xin[get_local_id(1)][get_local_id(0)] = text; - barrier(CLK_LOCAL_MEM_FENCE); - text = mix_and_propagate(xin); - } + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); + } - #pragma unroll 2 - for(int i = 0; i < iterations; ++i) - { - text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + #pragma unroll 2 + for(int i = 0; i < (MEMORY >> 7); ++i) + { + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; - #pragma unroll 10 - for(int j = 0; j < 10; ++j) - text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + #pragma unroll 10 + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); - barrier(CLK_LOCAL_MEM_FENCE); - xin[get_local_id(1)][get_local_id(0)] = text; - barrier(CLK_LOCAL_MEM_FENCE); - text = mix_and_propagate(xin); - } + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); } + #else #pragma unroll 2 - for(int i = 0; i < iterations; ++i) + for(int i = 0; i < (MEMORY >> 7); ++i) { text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; @@ -891,21 +775,18 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u // cryptonight_heavy #if (ALGO == 4) - if(version >= 3) + /* Also left over threads perform this loop. + * The left over thread results will be ignored + */ + for(size_t i=0; i < 16; i++) { - /* Also left over threads performe this loop. - * The left over thread results will be ignored - */ - for(size_t i=0; i < 16; i++) - { - #pragma unroll - for(int j = 0; j < 10; ++j) - text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); - barrier(CLK_LOCAL_MEM_FENCE); - xin[get_local_id(1)][get_local_id(0)] = text; - barrier(CLK_LOCAL_MEM_FENCE); - text = mix_and_propagate(xin); - } + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); } #endif diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index cab5ad9..f15b480 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -233,7 +233,7 @@ void minethd::work_main() assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); uint64_t target = oWork.iTarget; - XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo, version); + XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo); if(oWork.bNiceHash) pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39); @@ -249,7 +249,7 @@ void minethd::work_main() cl_uint results[0x100]; memset(results,0,sizeof(cl_uint)*(0x100)); - XMRRunJob(pGpuCtx, results, miner_algo, version); + XMRRunJob(pGpuCtx, results, miner_algo); for(size_t i = 0; i < results[0xFF]; i++) { -- cgit v1.1 From 1b18f598aa1190a0e6126ed2c70e052e9403d180 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 8 Apr 2018 20:40:54 +0200 Subject: refactor scratchpad creation Use the maximum scratchpad size from before and after the fork. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 10 ++++++---- xmrstak/backend/amd/autoAdjust.hpp | 5 ++++- xmrstak/backend/cpu/autoAdjust.hpp | 5 ++++- xmrstak/backend/cpu/autoAdjustHwloc.hpp | 5 ++++- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 11 +++++++++-- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 10 ++++++++-- 6 files changed, 35 insertions(+), 11 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index b9cc9b6..79e80bd 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -308,9 +308,10 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - size_t scratchPadSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); - int threadMemMask = cn_select_mask(::jconf::inst()->GetMiningAlgo()); - int hashIterations = cn_select_iter(::jconf::inst()->GetMiningAlgo()); + size_t scratchPadSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); size_t g_thd = ctx->rawIntensity; ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, scratchPadSize * g_thd, NULL, &ret); @@ -382,6 +383,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ for(int ii = 0; ii < num_algos; ++ii) { + // scratchpad size for the selected mining algorithm size_t hashMemSize = cn_select_memory(miner_algo[ii]); int threadMemMask = cn_select_mask(miner_algo[ii]); int hashIterations = cn_select_iter(miner_algo[ii]); @@ -493,7 +495,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ p_id++; } - if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS) + if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret)); return ERR_OCL_API; diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index c798cf3..6df0eea 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -83,7 +83,10 @@ private: constexpr size_t byteToMiB = 1024u * 1024u; - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); std::string conf; for(auto& ctx : devVec) diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index abba8b6..ed96d8b 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -36,7 +36,10 @@ public: bool printConfig() { - const size_t hashMemSizeKB = cn_select_memory(::jconf::inst()->GetMiningAlgo()) / 1024u; + const size_t hashMemSizeKB = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ) / 1024u; const size_t halfHashMemSizeKB = hashMemSizeKB / 2u; configEditor configTpl{}; diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index 68d2b3f..f110ee3 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -28,7 +28,10 @@ public: autoAdjust() { - hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); halfHashMemSize = hashMemSize / 2u; } diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 17fa24b..ac696dd 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -35,6 +35,7 @@ extern "C" #include "xmrstak/jconf.hpp" #include #include +#include #ifdef __GNUC__ #include @@ -202,7 +203,10 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) { - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096); @@ -278,7 +282,10 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al void cryptonight_free_ctx(cryptonight_ctx* ctx) { - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); if(ctx->ctx_info[0] != 0) { diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index ead93c5..f016ef4 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -279,7 +279,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) if(gpuArch < 70) CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); @@ -576,7 +579,10 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) ctx->total_device_memory = totalMemory; ctx->free_device_memory = freeMemory; - size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + size_t hashMemSize = std::max( + cn_select_memory(::jconf::inst()->GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + ); #ifdef WIN32 /* We use in windows bfactor (split slow kernel into smaller parts) to avoid -- cgit v1.1 From 8a2e5049f6dee227eb8b4fa8d747863ac18b9e58 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 14 Apr 2018 21:29:57 +0200 Subject: change 64Bit size value to 32Bit avoid conversion warning by reducing the size value type --- xmrstak/backend/cpu/crypto/c_blake256.c | 16 ++++++++-------- xmrstak/backend/cpu/crypto/c_blake256.h | 16 ++++++++-------- xmrstak/backend/cpu/crypto/c_skein.h | 2 +- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 2 +- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 10 +++++----- xmrstak/backend/cpu/crypto/hash.h | 4 +++- 6 files changed, 26 insertions(+), 24 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/cpu/crypto/c_blake256.c b/xmrstak/backend/cpu/crypto/c_blake256.c index ff623dd..e5fadfe 100644 --- a/xmrstak/backend/cpu/crypto/c_blake256.c +++ b/xmrstak/backend/cpu/crypto/c_blake256.c @@ -124,7 +124,7 @@ void blake224_init(state *S) { } // datalen = number of bits -void blake256_update(state *S, const uint8_t *data, uint64_t datalen) { +void blake256_update(state *S, const uint8_t *data, uint32_t datalen) { int left = S->buflen >> 3; int fill = 64 - left; @@ -155,7 +155,7 @@ void blake256_update(state *S, const uint8_t *data, uint64_t datalen) { } // datalen = number of bits -void blake224_update(state *S, const uint8_t *data, uint64_t datalen) { +void blake224_update(state *S, const uint8_t *data, uint32_t datalen) { blake256_update(S, data, datalen); } @@ -206,7 +206,7 @@ void blake224_final(state *S, uint8_t *digest) { } // inlen = number of bytes -void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { +void blake256_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) { state S; blake256_init(&S); blake256_update(&S, in, inlen * 8); @@ -214,7 +214,7 @@ void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { } // inlen = number of bytes -void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { +void blake224_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) { state S; blake224_init(&S); blake224_update(&S, in, inlen * 8); @@ -282,13 +282,13 @@ void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { } // datalen = number of bits -void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { +void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint32_t datalen) { // update the inner state blake256_update(&S->inner, data, datalen); } // datalen = number of bits -void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { +void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint32_t datalen) { // update the inner state blake224_update(&S->inner, data, datalen); } @@ -310,7 +310,7 @@ void hmac_blake224_final(hmac_state *S, uint8_t *digest) { } // keylen = number of bytes; inlen = number of bytes -void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { +void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) { hmac_state S; hmac_blake256_init(&S, key, keylen); hmac_blake256_update(&S, in, inlen * 8); @@ -318,7 +318,7 @@ void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const } // keylen = number of bytes; inlen = number of bytes -void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { +void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) { hmac_state S; hmac_blake224_init(&S, key, keylen); hmac_blake224_update(&S, in, inlen * 8); diff --git a/xmrstak/backend/cpu/crypto/c_blake256.h b/xmrstak/backend/cpu/crypto/c_blake256.h index b9c2aad..06c7917 100644 --- a/xmrstak/backend/cpu/crypto/c_blake256.h +++ b/xmrstak/backend/cpu/crypto/c_blake256.h @@ -17,27 +17,27 @@ typedef struct { void blake256_init(state *); void blake224_init(state *); -void blake256_update(state *, const uint8_t *, uint64_t); -void blake224_update(state *, const uint8_t *, uint64_t); +void blake256_update(state *, const uint8_t *, uint32_t); +void blake224_update(state *, const uint8_t *, uint32_t); void blake256_final(state *, uint8_t *); void blake224_final(state *, uint8_t *); -void blake256_hash(uint8_t *, const uint8_t *, uint64_t); -void blake224_hash(uint8_t *, const uint8_t *, uint64_t); +void blake256_hash(uint8_t *, const uint8_t *, uint32_t); +void blake224_hash(uint8_t *, const uint8_t *, uint32_t); /* HMAC functions: */ void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t); void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t); +void hmac_blake256_update(hmac_state *, const uint8_t *, uint32_t); +void hmac_blake224_update(hmac_state *, const uint8_t *, uint32_t); void hmac_blake256_final(hmac_state *, uint8_t *); void hmac_blake224_final(hmac_state *, uint8_t *); -void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); -void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); +void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t); +void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t); #endif /* _BLAKE256_H_ */ diff --git a/xmrstak/backend/cpu/crypto/c_skein.h b/xmrstak/backend/cpu/crypto/c_skein.h index 6165a2a..86dbc08 100644 --- a/xmrstak/backend/cpu/crypto/c_skein.h +++ b/xmrstak/backend/cpu/crypto/c_skein.h @@ -37,7 +37,7 @@ typedef enum } SkeinHashReturn; -typedef size_t SkeinDataLength; /* bit count type */ +typedef uint32_t SkeinDataLength; /* bit count type */ typedef u08b_t SkeinBitSequence; /* bit stream type */ /* "all-in-one" call */ diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 5203de8..c7d28e9 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -43,7 +43,7 @@ extern "C" { void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen); void keccakf(uint64_t st[25], int rounds); - extern void(*const extra_hashes[4])(const void *, size_t, char *); + extern void(*const extra_hashes[4])(const void *, uint32_t, char *); } // This will shift and xor tmp1 into itself as 4 32-bit vals such as diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index ac696dd..3ff3cb9 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -56,23 +56,23 @@ extern "C" #include #endif // _WIN32 -void do_blake_hash(const void* input, size_t len, char* output) { +void do_blake_hash(const void* input, uint32_t len, char* output) { blake256_hash((uint8_t*)output, (const uint8_t*)input, len); } -void do_groestl_hash(const void* input, size_t len, char* output) { +void do_groestl_hash(const void* input, uint32_t len, char* output) { groestl((const uint8_t*)input, len * 8, (uint8_t*)output); } -void do_jh_hash(const void* input, size_t len, char* output) { +void do_jh_hash(const void* input, uint32_t len, char* output) { jh_hash(32 * 8, (const uint8_t*)input, 8 * len, (uint8_t*)output); } -void do_skein_hash(const void* input, size_t len, char* output) { +void do_skein_hash(const void* input, uint32_t len, char* output) { skein_hash(8 * 32, (const uint8_t*)input, 8 * len, (uint8_t*)output); } -void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; +void (* const extra_hashes[4])(const void *, uint32_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; #ifdef _WIN32 #include "xmrstak/misc/uac.hpp" diff --git a/xmrstak/backend/cpu/crypto/hash.h b/xmrstak/backend/cpu/crypto/hash.h index c12d355..2af3309 100644 --- a/xmrstak/backend/cpu/crypto/hash.h +++ b/xmrstak/backend/cpu/crypto/hash.h @@ -1,5 +1,7 @@ #pragma once +#include + typedef unsigned char BitSequence; -typedef unsigned long long DataLength; +typedef uint32_t DataLength; typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn; -- cgit v1.1 From 01ebc092d274d5244d2375d049f3baf91750f2a6 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 14 Apr 2018 22:09:39 +0200 Subject: fix a few conversion warnings - fix conversion from large type tp small --- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 2 +- xmrstak/backend/cpu/hwlocMemory.cpp | 2 +- xmrstak/backend/cpu/minethd.cpp | 10 +++++----- xmrstak/backend/cpu/minethd.hpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index c7d28e9..7562de1 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -429,7 +429,7 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); uint64_t vh = _mm_cvtsi128_si64(tmp); - uint8_t x = vh >> 24; + uint8_t x = static_cast(vh >> 24); static const uint16_t table = 0x7531; const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; vh ^= ((table >> index) & 0x3) << 28; diff --git a/xmrstak/backend/cpu/hwlocMemory.cpp b/xmrstak/backend/cpu/hwlocMemory.cpp index 94d2b53..089570f 100644 --- a/xmrstak/backend/cpu/hwlocMemory.cpp +++ b/xmrstak/backend/cpu/hwlocMemory.cpp @@ -30,7 +30,7 @@ void bindMemoryToNUMANode( size_t puId ) depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); - for( size_t i = 0; + for( uint32_t i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++ ) { diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index ec3e145..263c83a 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -627,22 +627,22 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, void minethd::double_work_main() { - multiway_work_main<2>(); + multiway_work_main<2u>(); } void minethd::triple_work_main() { - multiway_work_main<3>(); + multiway_work_main<3u>(); } void minethd::quad_work_main() { - multiway_work_main<4>(); + multiway_work_main<4u>(); } void minethd::penta_work_main() { - multiway_work_main<5>(); + multiway_work_main<5u>(); } template @@ -656,7 +656,7 @@ void minethd::prep_multiway_work(uint8_t *bWorkBlob, uint32_t **piNonce) } } -template +template void minethd::multiway_work_main() { if(affinity >= 0) //-1 means no affinity diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 59583b6..85a95d1 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -35,7 +35,7 @@ private: minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity); - template + template void multiway_work_main(); template -- cgit v1.1 From 4682b28a5d304436ca20469e5089f97814f3f4ab Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 14 Apr 2018 23:17:33 +0200 Subject: allow non AMD OpenCL driver and devices - add CLI flag to explicitly use non AMD OpenCL and devices - adjust OpenCL output (use OpenCL instead of AMD if --altOpenCL is sued) - optimize NVIDIA OpenCL auto suggestion --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 30 ++++++++++++++++++++++-------- xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 + xmrstak/backend/amd/autoAdjust.hpp | 23 ++++++++++++++++++++--- xmrstak/backend/amd/minethd.cpp | 6 ++++-- xmrstak/backend/backendConnector.cpp | 5 +++-- 5 files changed, 50 insertions(+), 15 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 79e80bd..9a4ba73 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -675,11 +675,18 @@ std::vector getAMDDevices(int index) } std::string devVendor(devVendorVec.data()); - if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) + + bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos; + bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos; + + std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor; + if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA")) { GpuContext ctx; std::vector devNameVec(1024); size_t maxMem; + if( devVendor.find("NVIDIA Corporation") != std::string::npos) + ctx.isNVIDIA = true; if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS) { @@ -699,6 +706,10 @@ std::vector getAMDDevices(int index) continue; } + // the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation + if(ctx.isNVIDIA) + maxMem = ctx.freeMem; + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k); @@ -747,13 +758,15 @@ int getAMDPlatformIdx() clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos || + + bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos || platformName.find("Apple") != std::string::npos || - platformName.find("Mesa") != std::string::npos - ) + platformName.find("Mesa") != std::string::npos; + bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos; + std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor; + if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA")) { - - printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); + printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str()); if(platformName.find("Mesa") != std::string::npos) mesaPlatform = i; else @@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) std::vector platformNameVec(infoSize); clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") == std::string::npos) + if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos) { printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); } @@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) { size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize; ctx[i].rawIntensity = reduced_intensity; - printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity)); + const std::string backendName = xmrstak::params::inst().openCLVendor; + printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity)); } if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 0db6c90..5ab80b8 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -27,6 +27,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + bool isNVIDIA = false; int compMode; /*Output vars*/ diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 6df0eea..e7e98d4 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -91,6 +91,7 @@ private: std::string conf; for(auto& ctx : devVec) { + size_t minFreeMem = 128u * byteToMiB; /* 1000 is a magic selected limit, the reason is that more than 2GiB memory * sowing down the memory performance because of TLB cache misses */ @@ -112,12 +113,26 @@ private: */ maxThreads = 2024u; } + + // NVIDIA optimizations + if( + ctx.isNVIDIA && ( + ctx.name.find("P100") != std::string::npos || + ctx.name.find("V100") != std::string::npos + ) + ) + { + // do not limit the number of threads + maxThreads = 40000u; + minFreeMem = 512u * byteToMiB; + } + // increase all intensity limits by two for aeon if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) maxThreads *= 2u; // keep 128MiB memory free (value is randomly chosen) - size_t availableMem = ctx.freeMem - (128u * byteToMiB); + size_t availableMem = ctx.freeMem - minFreeMem; // 224byte extra memory is used per thread for meta data size_t perThread = hashMemSize + 224u; size_t maxIntensity = availableMem / perThread; @@ -138,7 +153,7 @@ private: // set 8 threads per block (this is a good value for the most gpus) conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n" + " \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n" " \"comp_mode\" : true\n" + " },\n"; } @@ -151,7 +166,9 @@ private: configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex)); configTpl.replace("GPUCONFIG",conf); configTpl.write(params::inst().configFileAMD); - printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str()); + + const std::string backendName = xmrstak::params::inst().openCLVendor; + printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str()); } std::vector devVec; diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index f15b480..9bc3676 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -137,6 +137,8 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor for (i = 0; i < n; i++) { jconf::inst()->GetThreadConfig(i, cfg); + + const std::string backendName = xmrstak::params::inst().openCLVendor; if(cfg.cpu_aff >= 0) { @@ -144,10 +146,10 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory."); #endif - printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff); + printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff); } else - printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i); + printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i); minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg); pvThreads->push_back(thd); diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp index acedbd6..6f80a0f 100644 --- a/xmrstak/backend/backendConnector.cpp +++ b/xmrstak/backend/backendConnector.cpp @@ -77,11 +77,12 @@ std::vector* BackendConnector::thread_starter(miner_work& pWork) #ifndef CONF_NO_OPENCL if(params::inst().useAMD) { - plugin amdplugin("AMD", "xmrstak_opencl_backend"); + const std::string backendName = xmrstak::params::inst().openCLVendor; + plugin amdplugin(backendName, "xmrstak_opencl_backend"); std::vector* amdThreads = amdplugin.startBackend(static_cast(pvThreads->size()), pWork, environment::inst()); pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads)); if(amdThreads->size() == 0) - printer::inst()->print_msg(L0, "WARNING: backend AMD disabled."); + printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str()); } #endif -- cgit v1.1 From a5ddd040a6eeb0609e1d1b0f16c1d271d31c7377 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 16 Apr 2018 17:52:42 +0200 Subject: fix wrong algo selection In the case where the dev pool mines on a higher version than a monero fork coin the miner is not resetting the algorithm. This PR select the correct algorithm each time the block version hash changed. --- xmrstak/backend/amd/minethd.cpp | 11 +++++++++-- xmrstak/backend/cpu/minethd.cpp | 22 ++++++++++++++++++---- xmrstak/backend/nvidia/minethd.cpp | 11 +++++++++-- 3 files changed, 36 insertions(+), 8 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 9bc3676..a3acf9d 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -201,6 +201,7 @@ void minethd::work_main() globalStates::inst().iConsumeCnt++; uint8_t version = 0; + size_t lastPoolId = 0; while (bQuit == 0) { @@ -219,13 +220,19 @@ void minethd::work_main() } uint8_t new_version = oWork.getVersion(); - if(new_version != version) + if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion()) + if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) { miner_algo = ::jconf::inst()->GetMiningAlgo(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } + else + { + miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + } + lastPoolId = oWork.iPoolId; version = new_version; } diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 263c83a..9494a79 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -434,6 +434,7 @@ void minethd::work_main() result.iThreadId = iThreadNo; uint8_t version = 0; + size_t lastPoolId = 0; while (bQuit == 0) { @@ -461,13 +462,19 @@ void minethd::work_main() result.iNonce = *piNonce; uint8_t new_version = oWork.getVersion(); - if(new_version != version) + if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion()) + if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) { miner_algo = ::jconf::inst()->GetMiningAlgo(); hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } + else + { + miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); + } + lastPoolId = oWork.iPoolId; version = new_version; } @@ -692,6 +699,7 @@ void minethd::multiway_work_main() auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); cn_hash_fun_multi hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); uint8_t version = 0; + size_t lastPoolId = 0; while (bQuit == 0) { @@ -718,13 +726,19 @@ void minethd::multiway_work_main() iNonce = *piNonce[0]; uint8_t new_version = oWork.getVersion(); - if(new_version != version) + if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion()) + if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) { miner_algo = ::jconf::inst()->GetMiningAlgo(); hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } + else + { + miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); + } + lastPoolId = oWork.iPoolId; version = new_version; } diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 804c06a..4593dab 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -247,6 +247,7 @@ void minethd::work_main() globalStates::inst().iConsumeCnt++; uint8_t version = 0; + size_t lastPoolId = 0; while (bQuit == 0) { @@ -264,13 +265,19 @@ void minethd::work_main() continue; } uint8_t new_version = oWork.getVersion(); - if(new_version != version) + if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion()) + if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) { miner_algo = ::jconf::inst()->GetMiningAlgo(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } + else + { + miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + } + lastPoolId = oWork.iPoolId; version = new_version; } -- cgit v1.1 From 0877e2f654b203c5145bb8154fcfb1ad46ba8265 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 16 Apr 2018 21:46:33 +0200 Subject: add independent dev pool coin description - allow the dev pool to fork on a different block version than the user descriped coin All algorithm are centered around the user coin description. It is allowed to have two two different coin algorithms in the user coin description. It is only allowed to use algorithms for the dev pool coin description those are used in the user coin description. There are two ways to define a non forking coin. - set both user coin algorithm descriptions to the same algorithm and set version to zero - set the first algorithm in the user coin description to something you like to use in the dev pool and set the second algorithm to the correct representation of the coin. Set the version to 255. This will allow that the dev pool can mine on a different coin algorithm than the not forking user coin. Do not use an algorithm with different scratchpad size for the dev pool. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 12 +++++------ xmrstak/backend/amd/autoAdjust.hpp | 6 +++--- xmrstak/backend/amd/minethd.cpp | 9 ++++---- xmrstak/backend/cpu/autoAdjust.hpp | 4 ++-- xmrstak/backend/cpu/autoAdjustHwloc.hpp | 4 ++-- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 8 +++---- xmrstak/backend/cpu/minethd.cpp | 26 ++++++++++++----------- xmrstak/backend/nvidia/minethd.cpp | 9 ++++---- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 12 +++++------ 9 files changed, 47 insertions(+), 43 deletions(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 9a4ba73..b3d36e7 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -309,8 +309,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ } size_t scratchPadSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); size_t g_thd = ctx->rawIntensity; @@ -376,8 +376,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ } xmrstak_algo miner_algo[2] = { - ::jconf::inst()->GetMiningAlgo(), - ::jconf::inst()->GetMiningAlgoRoot() + ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo(), + ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot() }; int num_algos = miner_algo[0] == miner_algo[1] ? 1 : 2; @@ -936,7 +936,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo) { // switch to the kernel storage - int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1; + int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1; cl_int ret; @@ -1101,7 +1101,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) { // switch to the kernel storage - int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1; + int kernel_storage = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1; cl_int ret; cl_uint zero = 0; diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index e7e98d4..685890b 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -84,8 +84,8 @@ private: constexpr size_t byteToMiB = 1024u * 1024u; size_t hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); std::string conf; @@ -128,7 +128,7 @@ private: } // increase all intensity limits by two for aeon - if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) + if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite) maxThreads *= 2u; // keep 128MiB memory free (value is randomly chosen) diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index a3acf9d..4353e3d 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -195,7 +195,7 @@ void minethd::work_main() cpu_ctx = cpu::minethd::minethd_alloc_ctx(); // start with root algorithm and switch later if fork version is reached - auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); globalStates::inst().iConsumeCnt++; @@ -222,14 +222,15 @@ void minethd::work_main() uint8_t new_version = oWork.getVersion(); if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) + coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId); + if(new_version >= coinDesc.GetMiningForkVersion()) { - miner_algo = ::jconf::inst()->GetMiningAlgo(); + miner_algo = coinDesc.GetMiningAlgo(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } else { - miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + miner_algo = coinDesc.GetMiningAlgoRoot(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } lastPoolId = oWork.iPoolId; diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index ed96d8b..518721a 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -37,8 +37,8 @@ public: { const size_t hashMemSizeKB = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ) / 1024u; const size_t halfHashMemSizeKB = hashMemSizeKB / 2u; diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index f110ee3..b1f3914 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -29,8 +29,8 @@ public: autoAdjust() { hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); halfHashMemSize = hashMemSize / 2u; } diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 3ff3cb9..ee3b663 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -204,8 +204,8 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) { size_t hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096); @@ -283,8 +283,8 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al void cryptonight_free_ctx(cryptonight_ctx* ctx) { size_t hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); if(ctx->ctx_info[0] != 0) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 9494a79..f8f70f9 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -231,7 +231,7 @@ bool minethd::self_test() bool bResult = true; - if(::jconf::inst()->GetMiningAlgo() == cryptonight) + if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight) { unsigned char out[32 * MAX_N]; cn_hash_fun hashf; @@ -276,13 +276,13 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; } - else if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite) { } - else if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero) + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_monero) { } - else if(::jconf::inst()->GetMiningAlgo() == cryptonight_aeon) + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon) { } @@ -424,7 +424,7 @@ void minethd::work_main() job_result result; // start with root algorithm and switch later if fork version is reached - auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); cn_hash_fun hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); ctx = minethd_alloc_ctx(); @@ -464,14 +464,15 @@ void minethd::work_main() uint8_t new_version = oWork.getVersion(); if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) + coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId); + if(new_version >= coinDesc.GetMiningForkVersion()) { - miner_algo = ::jconf::inst()->GetMiningAlgo(); + miner_algo = coinDesc.GetMiningAlgo(); hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } else { - miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + miner_algo = coinDesc.GetMiningAlgoRoot(); hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } lastPoolId = oWork.iPoolId; @@ -696,7 +697,7 @@ void minethd::multiway_work_main() globalStates::inst().iConsumeCnt++; // start with root algorithm and switch later if fork version is reached - auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); cn_hash_fun_multi hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); uint8_t version = 0; size_t lastPoolId = 0; @@ -728,14 +729,15 @@ void minethd::multiway_work_main() uint8_t new_version = oWork.getVersion(); if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) + coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId); + if(new_version >= coinDesc.GetMiningForkVersion()) { - miner_algo = ::jconf::inst()->GetMiningAlgo(); + miner_algo = coinDesc.GetMiningAlgo(); hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } else { - miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + miner_algo = coinDesc.GetMiningAlgoRoot(); hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo); } lastPoolId = oWork.iPoolId; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 4593dab..92f5f78 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -239,7 +239,7 @@ void minethd::work_main() cpu_ctx = cpu::minethd::minethd_alloc_ctx(); // start with root algorithm and switch later if fork version is reached - auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); uint32_t iNonce; @@ -267,14 +267,15 @@ void minethd::work_main() uint8_t new_version = oWork.getVersion(); if(new_version != version || oWork.iPoolId != lastPoolId) { - if(new_version >= ::jconf::inst()->GetMiningForkVersion() || oWork.iPoolId == 0) + coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId); + if(new_version >= coinDesc.GetMiningForkVersion()) { - miner_algo = ::jconf::inst()->GetMiningAlgo(); + miner_algo = coinDesc.GetMiningAlgo(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } else { - miner_algo = ::jconf::inst()->GetMiningAlgoRoot(); + miner_algo = coinDesc.GetMiningAlgoRoot(); hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } lastPoolId = oWork.iPoolId; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index f016ef4..f192f01 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -280,14 +280,14 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); size_t hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize; - if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo()) + if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) { // extent ctx_b to hold the state of idx0 ctx_b_size += sizeof(uint32_t) * wsize; @@ -580,8 +580,8 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) ctx->free_device_memory = freeMemory; size_t hashMemSize = std::max( - cn_select_memory(::jconf::inst()->GetMiningAlgo()), - cn_select_memory(::jconf::inst()->GetMiningAlgoRoot()) + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()), + cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) ); #ifdef WIN32 @@ -612,7 +612,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) // up to 16kibyte extra memory is used per thread for some kernel (lmem/local memory) // 680bytes are extra meta data memory per hash size_t perThread = hashMemSize + 16192u + 680u; - if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo()) + if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) perThread += 50 * 4; // state double buffer size_t max_intensity = limitedMemory / perThread; -- cgit v1.1 From 053dcb01d814daab686d66ba82da96ce2aec7747 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 16 Apr 2018 22:06:38 +0200 Subject: remove fork for sumokoin and monero remove fork version for sumokoin and monero7 --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xmrstak/backend') diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index b3d36e7..03100d0 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -1101,7 +1101,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) { // switch to the kernel storage - int kernel_storage = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1; + int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1; cl_int ret; cl_uint zero = 0; -- cgit v1.1