diff options
Diffstat (limited to 'xmrstak')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 121 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/autoAdjust.hpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/config.tpl | 10 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/jconf.cpp | 11 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/jconf.hpp | 1 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 1 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp | 3 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 17 | ||||
-rw-r--r-- | xmrstak/cli/cli-miner.cpp | 2 | ||||
-rw-r--r-- | xmrstak/misc/executor.cpp | 24 | ||||
-rw-r--r-- | xmrstak/net/jpsock.hpp | 4 |
11 files changed, 137 insertions, 59 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 42f6388..d9bc962 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -449,68 +449,85 @@ uint32_t getNumPlatforms() std::vector<GpuContext> getAMDDevices(int index) { std::vector<GpuContext> ctxVec; - cl_platform_id * platforms = NULL; + std::vector<cl_platform_id> platforms; + std::vector<cl_device_id> device_list; + cl_int clStatus; cl_uint num_devices; - cl_device_id *device_list = NULL; - uint32_t numPlatforms = getNumPlatforms(); - if(numPlatforms) + if(numPlatforms == 0) + return ctxVec; + + platforms.resize(numPlatforms); + if((clStatus = clGetPlatformIDs(numPlatforms, platforms.data(), NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); + return ctxVec; + } + + if((clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus)); + return ctxVec; + } + + device_list.resize(num_devices); + if((clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list.data(), NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus)); + return ctxVec; + } + + for (size_t k = 0; k < num_devices; k++) { - platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); - clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); - if(clStatus == CL_SUCCESS) + std::vector<char> devVendorVec(1024); + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL)) != CL_SUCCESS) { - clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); - if(clStatus == CL_SUCCESS) + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name for device %u.", err_to_str(clStatus), k); + continue; + } + + std::string devVendor(devVendorVec.data()); + if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) + { + GpuContext ctx; + std::vector<char> devNameVec(1024); + size_t maxMem; + + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_COMPUTE_UNITS for device %u.", err_to_str(clStatus), k); + continue; + } + + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL)) != CL_SUCCESS) { - device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); - clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); - if(clStatus == CL_SUCCESS) - { - for (int k = 0; k < num_devices; k++) - { - cl_int clError; - std::vector<char> devVendorVec(1024); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL); - if(clStatus == CL_SUCCESS) - { - std::string devVendor(devVendorVec.data()); - if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) - { - GpuContext ctx; - ctx.deviceIdx = k; - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL); - size_t maxMem; - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL); - // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory - ctx.freeMem = std::min(ctx.freeMem, maxMem); - std::vector<char> devNameVec(1024); - clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL); - ctx.name = std::string(devNameVec.data()); - printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); - ctx.DeviceID = device_list[k]; - ctxVec.push_back(ctx); - } - } - else - printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name.", err_to_str(clStatus)); - } - } - else - printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus)); - free(device_list); + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_MEM_ALLOC_SIZE for device %u.", err_to_str(clStatus), k); + continue; } - else - printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus)); + + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_GLOBAL_MEM_SIZE for device %u.", err_to_str(clStatus), k); + continue; + } + + if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k); + continue; + } + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); + + // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory + ctx.deviceIdx = k; + ctx.freeMem = std::min(ctx.freeMem, maxMem); + ctx.name = std::string(devNameVec.data()); + ctx.DeviceID = device_list[k]; + ctxVec.push_back(ctx); } - else - printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); } - - free(platforms); return ctxVec; } diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index c6a7dca..be7d1ce 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -95,7 +95,7 @@ private: conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" + " \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" + " \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" + - " \"affine_to_cpu\" : false,\n" + + " \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" + " },\n"; } } diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl index 99dc023..5479172 100644 --- a/xmrstak/backend/nvidia/config.tpl +++ b/xmrstak/backend/nvidia/config.tpl @@ -9,6 +9,12 @@ R"===( * bsleep - Insert a delay of X microseconds between kernel launches. * Increase if you want to reduce GPU lag. Recommended setting on GUI systems - 100 * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. + * sync_mode - method used to synchronize the device + * documentation: http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac + * 0 = cudaDeviceScheduleAuto + * 1 = cudaDeviceScheduleSpin - create a high load on one cpu thread per gpu + * 2 = cudaDeviceScheduleYield + * 3 = cudaDeviceScheduleBlockingSync (default) * * On the first run the miner will look at your system and suggest a basic configuration that will work, * you can try to tweak it from there to get the best performance. @@ -16,7 +22,9 @@ R"===( * A filled out configuration should look like this: * "gpu_threads_conf" : * [ - * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false}, + * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, + * "affine_to_cpu" : false, "sync_mode" : 3, + * }, * ], */ diff --git a/xmrstak/backend/nvidia/jconf.cpp b/xmrstak/backend/nvidia/jconf.cpp index 4208145..46c5726 100644 --- a/xmrstak/backend/nvidia/jconf.cpp +++ b/xmrstak/backend/nvidia/jconf.cpp @@ -123,16 +123,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff; + const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff, *syncMode; gid = GetObjectMember(oThdConf, "index"); blocks = GetObjectMember(oThdConf, "blocks"); threads = GetObjectMember(oThdConf, "threads"); bfactor = GetObjectMember(oThdConf, "bfactor"); bsleep = GetObjectMember(oThdConf, "bsleep"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); + syncMode = GetObjectMember(oThdConf, "sync_mode"); if(gid == nullptr || blocks == nullptr || threads == nullptr || - bfactor == nullptr || bsleep == nullptr || aff == nullptr) + bfactor == nullptr || bsleep == nullptr || aff == nullptr || syncMode == nullptr) { return false; } @@ -155,11 +156,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) if(!aff->IsUint64() && !aff->IsBool()) return false; + if(!syncMode->IsNumber() || syncMode->GetInt() < 0 || syncMode->GetInt() > 3) + { + printer::inst()->print_msg(L0, "Error NVIDIA: sync_mode out of range or no number. ( range: 0 <= sync_mode < 4.)"); + return false; + } cfg.id = gid->GetInt(); cfg.blocks = blocks->GetInt(); cfg.threads = threads->GetInt(); cfg.bfactor = bfactor->GetInt(); cfg.bsleep = bsleep->GetInt(); + cfg.syncMode = syncMode->GetInt(); if(aff->IsNumber()) cfg.cpu_aff = aff->GetInt(); diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp index b09a162..7f60f1d 100644 --- a/xmrstak/backend/nvidia/jconf.hpp +++ b/xmrstak/backend/nvidia/jconf.hpp @@ -28,6 +28,7 @@ public: bool bDoubleMode; bool bNoPrefetch; int32_t cpu_aff; + int syncMode; long long iCpuAff; }; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 9eab1c0..6e628fd 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -77,6 +77,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) ctx.device_threads = (int)cfg.threads; ctx.device_bfactor = (int)cfg.bfactor; ctx.device_bsleep = (int)cfg.bsleep; + ctx.syncMode = cfg.syncMode; this->affinity = cfg.cpu_aff; std::unique_lock<std::mutex> lck(thd_aff_set); diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index 1b63379..afbdbaf 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -11,7 +11,8 @@ typedef struct { int device_blocks; int device_threads; int device_bfactor; - int device_bsleep; + int device_bsleep; + int syncMode; uint32_t *d_input; uint32_t inputlen; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 333ae73..0fc99a4 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -189,7 +189,22 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) } cudaDeviceReset(); - cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + switch(ctx->syncMode) + { + case 0: + cudaSetDeviceFlags(cudaDeviceScheduleAuto); + break; + case 1: + cudaSetDeviceFlags(cudaDeviceScheduleSpin); + break; + case 2: + cudaSetDeviceFlags(cudaDeviceScheduleYield); + break; + case 3: + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + break; + + }; cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); size_t hashMemSize; diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index bc52923..546d226 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -85,7 +85,7 @@ void help() cout<<" --nvidia FILE NVIDIA backend miner config file"<<endl; #endif cout<<" "<<endl; - cout<<"The Following options temporary overwrites the config file settings:"<<endl; + cout<<"The Following options temporary overwrites the config entries of \nthe pool with the highest weight:"<<endl; cout<<" -o, --url URL pool url and port, e.g. pool.usxmrpool.com:3333"<<endl; cout<<" -u, --user USERNAME pool user name or wallet address"<<endl; cout<<" -p, --pass PASSWD pool password, in the most cases x or empty \"\""<<endl; diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index c500b21..6f34d80 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -528,6 +528,30 @@ void executor::ex_main() pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", 0.0, true, false, "", true); } + /* find the pool with the highest weighting to allow overwriting of the + * pool settings via command line options. + */ + std::vector<jpsock*> sorted_pools; + sorted_pools.reserve(pools.size()); + for(jpsock& pool : pools) + sorted_pools.emplace_back(&pool); + std::sort(sorted_pools.begin(), sorted_pools.end(), [](jpsock* a, jpsock* b) { return b->get_pool_weight(true) < a->get_pool_weight(true); }); + + // overwrite pool address if cli option is used + auto& poolURL = xmrstak::params::inst().poolURL; + if(!poolURL.empty()) + { + sorted_pools[0]->set_pool_addr(poolURL.c_str()); + } + // overwrite user pool login name if cli option is used + auto& poolUsername = xmrstak::params::inst().poolUsername; + if(!poolUsername.empty()) + sorted_pools[0]->set_user_login(poolUsername.c_str()); + // overwrite user pool login password if cli option is used + auto& poolPasswd = xmrstak::params::inst().poolPasswd; + if(!poolPasswd.empty()) + sorted_pools[0]->set_user_passwd(poolPasswd.c_str()); + ex_event ev; std::thread clock_thd(&executor::ex_clock_thd, this); diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp index 9d276b7..ba5d1c8 100644 --- a/xmrstak/net/jpsock.hpp +++ b/xmrstak/net/jpsock.hpp @@ -59,6 +59,10 @@ public: inline const char* get_tls_fp() { return tls_fp.c_str(); } inline bool is_nicehash() { return nicehash; } + inline void set_pool_addr(const char* sAddr) { net_addr = sAddr; } + inline void set_user_login(const char* sLogin) { usr_login = sLogin; } + inline void set_user_passwd(const char* sPassword) { usr_pass = sPassword; } + bool get_pool_motd(std::string& strin); std::string&& get_call_error(); |