From 4874d6817fb5c973b57e88eeb9f55fede60d6281 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Sat, 23 Dec 2017 15:21:48 +0000 Subject: Clearer TLS error --- xmrstak/net/socket.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/net/socket.cpp b/xmrstak/net/socket.cpp index e19d1d4..89e9902 100644 --- a/xmrstak/net/socket.cpp +++ b/xmrstak/net/socket.cpp @@ -190,7 +190,7 @@ void tls_socket::print_error() if(jconf::inst()->TlsSecureAlgos()) pCallback->set_socket_error("Unknown TLS error. Secure TLS maybe unsupported, try setting tls_secure_algo to false."); else - pCallback->set_socket_error("Unknown TLS error."); + pCallback->set_socket_error("Unknown TLS error. You might be trying to connect to a non-TLS port."); } else pCallback->set_socket_error(buf, len); -- cgit v1.1 From 3216b47fdacf701942457b23341f483aadbe5eb7 Mon Sep 17 00:00:00 2001 From: dangrabbits Date: Mon, 25 Dec 2017 09:32:26 +0800 Subject: Updated config.tpl comments to include how to exclude CPU/GPUs --- xmrstak/backend/amd/config.tpl | 3 +++ xmrstak/backend/cpu/config.tpl | 3 +++ xmrstak/backend/nvidia/config.tpl | 3 +++ 3 files changed, 9 insertions(+) diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index af662f8..25b75a1 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -12,6 +12,9 @@ R"===( * [ * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true }, * ], + * If you do not wish to mine with your AMD GPU(s) then use: + * "gpu_threads_conf" : + * null, */ "gpu_threads_conf" : [ diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl index b21a22d..fc4acb9 100644 --- a/xmrstak/backend/cpu/config.tpl +++ b/xmrstak/backend/cpu/config.tpl @@ -24,6 +24,9 @@ R"===( * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 }, * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 }, * ], + * If you do not wish to mine with your CPU(s) then use: + * "cpu_threads_conf" : + * null, */ "cpu_threads_conf" : diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl index 5479172..f489956 100644 --- a/xmrstak/backend/nvidia/config.tpl +++ b/xmrstak/backend/nvidia/config.tpl @@ -26,6 +26,9 @@ R"===( * "affine_to_cpu" : false, "sync_mode" : 3, * }, * ], + * If you do not wish to mine with your nVidia GPU(s) then use: + * "gpu_threads_conf" : + * null, */ "gpu_threads_conf" : -- cgit v1.1 From 9201a73269ad057d93d7c850aa68336b264f5468 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Mon, 25 Dec 2017 14:54:30 +0000 Subject: Add option to enable web interface from cli --- xmrstak/cli/cli-miner.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++--- xmrstak/config.tpl | 2 +- xmrstak/jconf.cpp | 5 +++- xmrstak/params.hpp | 4 ++++ 4 files changed, 64 insertions(+), 5 deletions(-) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index b84b783..077d463 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -86,6 +86,9 @@ void help() cout<<" --noNVIDIA disable the NVIDIA miner backend"<> port) || port < 0 || port > 65535) + { + std::cin.clear(); + std::cin.ignore(INT_MAX, '\n'); + std::cout << "Invalid port number. Please enter a number between 0 and 65535." << std::endl; + } + + http_port = port; +#endif + } + auto& pool = params::inst().poolURL; bool userSetPool = true; if(pool.empty()) @@ -306,6 +331,7 @@ void do_guided_config() configTpl.replace("POOLCONF", pool_table); configTpl.replace("CURRENCY", currency); + configTpl.replace("HTTP_PORT", std::to_string(http_port)); configTpl.write(params::inst().configFile); std::cout<<"Configuration stored in file '"<=argc ) + { + printer::inst()->print_msg(L0, "No argument for parameter '-i/--httpd' given"); + win_exit(); + return 1; + } + + char* endp = nullptr; + long int ret = strtol(argv[i], &endp, 10); + + if(endp == nullptr || ret < 0 || ret > 65535) + { + printer::inst()->print_msg(L0, "Argument for parameter '-i/--httpd' must be a number between 0 and 65535"); + win_exit(); + return 1; + } + + params::inst().httpd_port = ret; + } else if(opName.compare("--noUAC") == 0) { uacDialog = false; @@ -546,16 +594,20 @@ int main(int argc, char *argv[]) return 1; } -#ifndef CONF_NO_HTTPD - if(jconf::inst()->GetHttpdPort() != 0) + if(jconf::inst()->GetHttpdPort() != uint16_t(params::httpd_port_disabled)) { +#ifdef CONF_NO_HTTPD + printer::inst()->print_msg(L0, "HTTPD port is enabled but this binary was compiled without HTTP support!"); + win_exit(); + return 1; +#else if (!httpd::inst()->start_daemon()) { win_exit(); return 1; } - } #endif + } printer::inst()->print_str("-------------------------------------------------------------------\n"); printer::inst()->print_str(get_version_str_short().c_str()); diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl index ae97190..2c7bd41 100644 --- a/xmrstak/config.tpl +++ b/xmrstak/config.tpl @@ -159,7 +159,7 @@ POOLCONF], * * httpd_port - Port we should listen on. Default, 0, will switch off the server. */ -"httpd_port" : 0, +"httpd_port" : HTTP_PORT, /* * HTTP Authentication diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index f279f52..a1db451 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -242,7 +242,10 @@ uint64_t jconf::GetAutohashTime() uint16_t jconf::GetHttpdPort() { - return prv->configValues[iHttpdPort]->GetUint(); + if(xmrstak::params::inst().httpd_port == xmrstak::params::httpd_port_unset) + return prv->configValues[iHttpdPort]->GetUint(); + else + return uint16_t(xmrstak::params::inst().httpd_port); } const char* jconf::GetHttpUsername() diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp index bc32612..62cce47 100644 --- a/xmrstak/params.hpp +++ b/xmrstak/params.hpp @@ -31,6 +31,10 @@ struct params std::string poolUsername; bool nicehashMode = false; + static constexpr int32_t httpd_port_unset = -1; + static constexpr int32_t httpd_port_disabled = 0; + int32_t httpd_port = httpd_port_unset; + std::string currency; std::string configFile; -- cgit v1.1 From 038a4eb34d40abac8d0242f9ca881b0dac344723 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Mon, 25 Dec 2017 16:36:44 +0000 Subject: On-demand elevation --- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 4 +++ xmrstak/cli/cli-miner.cpp | 31 +++++++++++------------ xmrstak/misc/uac.hpp | 20 +++++++++++++-- xmrstak/params.hpp | 4 +++ 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 8b2207d..583deff 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -73,6 +73,8 @@ void do_skein_hash(const void* input, size_t len, char* output) { void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; #ifdef _WIN32 +#include "xmrstak/misc/uac.hpp" + BOOL bRebootDesirable = FALSE; //If VirtualAlloc fails, suggest a reboot BOOL AddPrivilege(TCHAR* pszPrivilege) @@ -176,6 +178,8 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) if(AddPrivilege(TEXT("SeLockMemoryPrivilege")) == 0) { + RequestElevation(); + if(AddLargePageRights()) { msg->warning = "Added SeLockMemoryPrivilege to the current account. You need to reboot for it to work"; diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index b84b783..4904604 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -342,7 +342,14 @@ int main(int argc, char *argv[]) params::inst().executablePrefix += seperator; } - bool uacDialog = true; + params::inst().minerArg0 = argv[0]; + params::inst().minerArgs.reserve(argc * 16); + for(int i = 1; i < argc; i++) + { + params::inst().minerArgs += " "; + params::inst().minerArgs += argv[i]; + } + bool pool_url_set = false; for(size_t i = 1; i < argc-1; i++) { @@ -506,7 +513,7 @@ int main(int argc, char *argv[]) } else if(opName.compare("--noUAC") == 0) { - uacDialog = false; + params::inst().allowUAC = false; } else { @@ -516,20 +523,6 @@ int main(int argc, char *argv[]) } } -#ifdef _WIN32 - if(uacDialog && !IsElevated()) - { - std::string minerArgs; - for(int i = 1; i < argc; i++) - { - minerArgs += " "; - minerArgs += argv[i]; - } - - SelfElevate(argv[0], minerArgs); - } -#endif - // check if we need a guided start if(!configEditor::file_exist(params::inst().configFile)) do_guided_config(); @@ -540,6 +533,12 @@ int main(int argc, char *argv[]) return 1; } +#ifdef _WIN32 + /* For Windows 7 and 8 request elevation at all times unless we are using slow memory */ + if(jconf::inst()->GetSlowMemSetting() != jconf::slow_mem_cfg::always_use && LOBYTE(LOWORD(GetVersion())) < 10) + RequestElevation(); +#endif + if (!BackendConnector::self_test()) { win_exit(); diff --git a/xmrstak/misc/uac.hpp b/xmrstak/misc/uac.hpp index 55c5f1a..4fb5b0c 100644 --- a/xmrstak/misc/uac.hpp +++ b/xmrstak/misc/uac.hpp @@ -2,6 +2,7 @@ #ifdef _WIN32 #include "xmrstak/misc/console.hpp" +#include "xmrstak/params.hpp" #include #include @@ -22,7 +23,7 @@ BOOL IsElevated() return fRet; } -BOOL SelfElevate(const char* my_path, const std::string& params) +BOOL SelfElevate(const std::string& my_path, const std::string& params) { if (IsElevated()) return FALSE; @@ -32,7 +33,7 @@ BOOL SelfElevate(const char* my_path, const std::string& params) shExecInfo.fMask = SEE_MASK_NOCLOSEPROCESS; shExecInfo.hwnd = NULL; shExecInfo.lpVerb = "runas"; - shExecInfo.lpFile = my_path; + shExecInfo.lpFile = my_path.c_str(); shExecInfo.lpParameters = params.c_str(); shExecInfo.lpDirectory = NULL; shExecInfo.nShow = SW_SHOW; @@ -48,4 +49,19 @@ BOOL SelfElevate(const char* my_path, const std::string& params) return TRUE; } + +VOID RequestElevation() +{ + if(IsElevated()) + return; + + if(!xmrstak::params::inst().allowUAC) + { + printer::inst()->print_msg(L0, "The miner needs to run as administrator, but you passed --noUAC option. Please remove it or set use_slow_memory to always."); + win_exit(); + return; + } + + SelfElevate(xmrstak::params::inst().minerArg0, xmrstak::params::inst().minerArgs); +} #endif diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp index bc32612..4cc041c 100644 --- a/xmrstak/params.hpp +++ b/xmrstak/params.hpp @@ -38,6 +38,10 @@ struct params std::string configFileNVIDIA; std::string configFileCPU; + bool allowUAC = true; + std::string minerArg0; + std::string minerArgs; + params() : binaryName("xmr-stak"), executablePrefix(""), -- cgit v1.1 From 5e5888bff84eb98932df1852ef57ff6ebfc0be56 Mon Sep 17 00:00:00 2001 From: Unknown Date: Mon, 25 Dec 2017 17:30:52 +0000 Subject: fix windows build --- xmrstak/misc/uac.cpp | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ xmrstak/misc/uac.hpp | 64 +++---------------------------------------------- 2 files changed, 70 insertions(+), 61 deletions(-) create mode 100644 xmrstak/misc/uac.cpp diff --git a/xmrstak/misc/uac.cpp b/xmrstak/misc/uac.cpp new file mode 100644 index 0000000..4fb5b0c --- /dev/null +++ b/xmrstak/misc/uac.cpp @@ -0,0 +1,67 @@ +#pragma once + +#ifdef _WIN32 +#include "xmrstak/misc/console.hpp" +#include "xmrstak/params.hpp" + +#include +#include + +BOOL IsElevated() +{ + BOOL fRet = FALSE; + HANDLE hToken = NULL; + if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) + { + TOKEN_ELEVATION Elevation; + DWORD cbSize = sizeof(TOKEN_ELEVATION); + if (GetTokenInformation(hToken, TokenElevation, &Elevation, sizeof(Elevation), &cbSize)) + fRet = Elevation.TokenIsElevated; + } + if (hToken) + CloseHandle(hToken); + return fRet; +} + +BOOL SelfElevate(const std::string& my_path, const std::string& params) +{ + if (IsElevated()) + return FALSE; + + SHELLEXECUTEINFO shExecInfo = { 0 }; + shExecInfo.cbSize = sizeof(SHELLEXECUTEINFO); + shExecInfo.fMask = SEE_MASK_NOCLOSEPROCESS; + shExecInfo.hwnd = NULL; + shExecInfo.lpVerb = "runas"; + shExecInfo.lpFile = my_path.c_str(); + shExecInfo.lpParameters = params.c_str(); + shExecInfo.lpDirectory = NULL; + shExecInfo.nShow = SW_SHOW; + shExecInfo.hInstApp = NULL; + + if (!ShellExecuteEx(&shExecInfo)) + return FALSE; + + // Loiter in the background to make scripting easier + printer::inst()->print_msg(L0, "This window has been opened because xmr-stak needed to run as administrator. It can be safely closed now."); + WaitForSingleObject(shExecInfo.hProcess, INFINITE); + std::exit(0); + + return TRUE; +} + +VOID RequestElevation() +{ + if(IsElevated()) + return; + + if(!xmrstak::params::inst().allowUAC) + { + printer::inst()->print_msg(L0, "The miner needs to run as administrator, but you passed --noUAC option. Please remove it or set use_slow_memory to always."); + win_exit(); + return; + } + + SelfElevate(xmrstak::params::inst().minerArg0, xmrstak::params::inst().minerArgs); +} +#endif diff --git a/xmrstak/misc/uac.hpp b/xmrstak/misc/uac.hpp index 4fb5b0c..82cdf42 100644 --- a/xmrstak/misc/uac.hpp +++ b/xmrstak/misc/uac.hpp @@ -1,67 +1,9 @@ #pragma once #ifdef _WIN32 -#include "xmrstak/misc/console.hpp" -#include "xmrstak/params.hpp" - #include -#include - -BOOL IsElevated() -{ - BOOL fRet = FALSE; - HANDLE hToken = NULL; - if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) - { - TOKEN_ELEVATION Elevation; - DWORD cbSize = sizeof(TOKEN_ELEVATION); - if (GetTokenInformation(hToken, TokenElevation, &Elevation, sizeof(Elevation), &cbSize)) - fRet = Elevation.TokenIsElevated; - } - if (hToken) - CloseHandle(hToken); - return fRet; -} - -BOOL SelfElevate(const std::string& my_path, const std::string& params) -{ - if (IsElevated()) - return FALSE; - - SHELLEXECUTEINFO shExecInfo = { 0 }; - shExecInfo.cbSize = sizeof(SHELLEXECUTEINFO); - shExecInfo.fMask = SEE_MASK_NOCLOSEPROCESS; - shExecInfo.hwnd = NULL; - shExecInfo.lpVerb = "runas"; - shExecInfo.lpFile = my_path.c_str(); - shExecInfo.lpParameters = params.c_str(); - shExecInfo.lpDirectory = NULL; - shExecInfo.nShow = SW_SHOW; - shExecInfo.hInstApp = NULL; - - if (!ShellExecuteEx(&shExecInfo)) - return FALSE; - - // Loiter in the background to make scripting easier - printer::inst()->print_msg(L0, "This window has been opened because xmr-stak needed to run as administrator. It can be safely closed now."); - WaitForSingleObject(shExecInfo.hProcess, INFINITE); - std::exit(0); - - return TRUE; -} - -VOID RequestElevation() -{ - if(IsElevated()) - return; - - if(!xmrstak::params::inst().allowUAC) - { - printer::inst()->print_msg(L0, "The miner needs to run as administrator, but you passed --noUAC option. Please remove it or set use_slow_memory to always."); - win_exit(); - return; - } - SelfElevate(xmrstak::params::inst().minerArg0, xmrstak::params::inst().minerArgs); -} +BOOL IsElevated(); +BOOL SelfElevate(const std::string& my_path, const std::string& params); +VOID RequestElevation(); #endif -- cgit v1.1 From e9f30f6523c9ede02acc97146163a65817affea9 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Mon, 25 Dec 2017 18:02:03 +0000 Subject: Add a link to increase API visibility --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 86570cc..788a072 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV - auto configuration for each backend - open source software (GPLv3) - TLS support -- HTML statistics -- JSON API for monitoring +- [HTML statistics](doc/usage.md#html-and-json-api-report-configuraton) +- [JSON API for monitoring](doc/usage.md#html-and-json-api-report-configuraton) ## Supported altcoins -- cgit v1.1 From b38aa0743813007cb4d1672b55ac334ebd472dd9 Mon Sep 17 00:00:00 2001 From: Anthony Uk Date: Mon, 25 Dec 2017 19:38:55 +0100 Subject: CPU - cryptonight_aesni.h - rearranged prefetch instructions to allow more time for cache to charge --- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 9b6e1dc..e4ccbc3 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -317,10 +317,9 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); idx0 = _mm_cvtsi128_si64(cx); - bx0 = cx; - if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0); + bx0 = cx; uint64_t hi, lo, cl, ch; cl = ((uint64_t*)&l0[idx0 & MASK])[0]; @@ -329,15 +328,15 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c lo = _umul128(idx0, cl, &hi); al0 += hi; - ah0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = al0; + al0 ^= cl; + if(PREFETCH) + _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); + ah0 += lo; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - if(PREFETCH) - _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0); + idx0 = al0; } // Optim - 90% time boundary -- cgit v1.1 From 4bbd172745daea2837393eb380641abda2eb1301 Mon Sep 17 00:00:00 2001 From: Thiago Dias <1681936+tvdias@users.noreply.github.com> Date: Tue, 26 Dec 2017 16:31:58 +0000 Subject: Fix on Dockerfile adding build-essential package --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3e996ef..3458387 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ENV XMRSTAK_CMAKE_FLAGS -DXMR-STAK_COMPILE=generic -DCUDA_ENABLE=ON -DOpenCL_ENA # Innstall packages RUN apt-get update \ && set -x \ - && apt-get install -qq --no-install-recommends -y ca-certificates cmake cuda-core-9-0 git cuda-cudart-dev-9-0 libhwloc-dev libmicrohttpd-dev libssl-dev \ + && apt-get install -qq --no-install-recommends -y build-essential ca-certificates cmake cuda-core-9-0 git cuda-cudart-dev-9-0 libhwloc-dev libmicrohttpd-dev libssl-dev \ && git clone $GIT_REPOSITORY \ && cd /xmr-stak \ && cmake ${XMRSTAK_CMAKE_FLAGS} . \ @@ -16,7 +16,7 @@ RUN apt-get update \ && cd - \ && mv /xmr-stak/bin/* /usr/local/bin/ \ && rm -rf /xmr-stak \ - && apt-get purge -y -qq cmake cuda-core-9-0 git cuda-cudart-dev-9-0 libhwloc-dev libmicrohttpd-dev libssl-dev \ + && apt-get purge -y -qq build-essential cmake cuda-core-9-0 git cuda-cudart-dev-9-0 libhwloc-dev libmicrohttpd-dev libssl-dev \ && apt-get clean -qq VOLUME /mnt -- cgit v1.1 From b216f39a52d87e48b2399da5e3272a9a464ac359 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Tue, 26 Dec 2017 21:11:22 +0000 Subject: Busywork courtesy of Microsoft Add messages Missing include 1 --- CMakeLists.txt | 10 ++++--- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 2 ++ xmrstak/cli/cli-miner.cpp | 5 +++- xmrstak/cli/xmr-stak.manifest | 34 +++++++++++++++++++++++ xmrstak/misc/uac.cpp | 12 ++++++++ xmrstak/misc/uac.hpp | 1 + 6 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 xmrstak/cli/xmr-stak.manifest diff --git a/CMakeLists.txt b/CMakeLists.txt index 10f33bd..3b3c7eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ project(xmr-stak) -cmake_minimum_required(VERSION 3.1.0) +cmake_minimum_required(VERSION 3.4.0) # enforce C++11 set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -522,9 +522,11 @@ endif() file(GLOB SRCFILES_CPP "xmrstak/cli/*.cpp") set_source_files_properties(${SRCFILES_CPP} PROPERTIES LANGUAGE CXX) -add_executable(xmr-stak - ${SRCFILES_CPP} -) +if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_executable(xmr-stak ${SRCFILES_CPP} xmrstak/cli/xmr-stak.manifest) +else() + add_executable(xmr-stak ${SRCFILES_CPP}) +endif() set(EXECUTABLE_OUTPUT_PATH "bin") set(LIBRARY_OUTPUT_PATH "bin") diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 583deff..547b104 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -31,6 +31,7 @@ extern "C" #include "cryptonight.h" #include "cryptonight_aesni.h" #include "xmrstak/backend/cryptonight.hpp" +#include "xmrstak/misc/console.hpp" #include "xmrstak/jconf.hpp" #include #include @@ -178,6 +179,7 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) if(AddPrivilege(TEXT("SeLockMemoryPrivilege")) == 0) { + printer::inst()->print_msg(L0, "Elevating because we need to set up fast memory privileges."); RequestElevation(); if(AddLargePageRights()) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 4904604..0daa4e8 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -535,8 +535,11 @@ int main(int argc, char *argv[]) #ifdef _WIN32 /* For Windows 7 and 8 request elevation at all times unless we are using slow memory */ - if(jconf::inst()->GetSlowMemSetting() != jconf::slow_mem_cfg::always_use && LOBYTE(LOWORD(GetVersion())) < 10) + if(jconf::inst()->GetSlowMemSetting() != jconf::slow_mem_cfg::always_use && !IsWindows10OrNewer()) + { + printer::inst()->print_msg(L0, "Elevating due to Windows 7 or 8. You need Windows 10 to use fast memory without UAC elevation."); RequestElevation(); + } #endif if (!BackendConnector::self_test()) diff --git a/xmrstak/cli/xmr-stak.manifest b/xmrstak/cli/xmr-stak.manifest new file mode 100644 index 0000000..ed65c97 --- /dev/null +++ b/xmrstak/cli/xmr-stak.manifest @@ -0,0 +1,34 @@ + + + + XMR-Stak Monero Miner + + + + + + + + + + + + + + + + + + + + + + diff --git a/xmrstak/misc/uac.cpp b/xmrstak/misc/uac.cpp index 4fb5b0c..5e8d08a 100644 --- a/xmrstak/misc/uac.cpp +++ b/xmrstak/misc/uac.cpp @@ -64,4 +64,16 @@ VOID RequestElevation() SelfElevate(xmrstak::params::inst().minerArg0, xmrstak::params::inst().minerArgs); } + +BOOL IsWindows10OrNewer() +{ + OSVERSIONINFOEX osvi = { 0 }; + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + osvi.dwMajorVersion = 10; + osvi.dwMinorVersion = 0; + DWORDLONG dwlConditionMask = 0; + VER_SET_CONDITION(dwlConditionMask, VER_MAJORVERSION, VER_GREATER_EQUAL); + VER_SET_CONDITION(dwlConditionMask, VER_MINORVERSION, VER_GREATER_EQUAL); + return ::VerifyVersionInfo(&osvi, VER_MAJORVERSION | VER_MINORVERSION, dwlConditionMask); +} #endif diff --git a/xmrstak/misc/uac.hpp b/xmrstak/misc/uac.hpp index 82cdf42..33c79ae 100644 --- a/xmrstak/misc/uac.hpp +++ b/xmrstak/misc/uac.hpp @@ -6,4 +6,5 @@ BOOL IsElevated(); BOOL SelfElevate(const std::string& my_path, const std::string& params); VOID RequestElevation(); +BOOL IsWindows10OrNewer(); #endif -- cgit v1.1 From bbd247a1599a9d5858e3b0f4a495736b87c86346 Mon Sep 17 00:00:00 2001 From: Aaron Date: Wed, 27 Dec 2017 15:26:45 -0600 Subject: Update compile_Windows.md fix minor spelling error --- doc/compile_Windows.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index c9a8ff7..f898867 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -22,7 +22,7 @@ ### Cuda 8.0+ (only needed to use NVIDIA GPUs) -- donwload and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) +- download and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) - for minimal install choose `Custom installation options` during the install and select - CUDA/Develpment - CUDA/Visual Studio Integration (ignore the warning during the install that VS2017 is not supported) -- cgit v1.1 From ad2ce39c13de2c64e90da62a157afd1a4616e9ac Mon Sep 17 00:00:00 2001 From: Lehmax Date: Thu, 28 Dec 2017 01:37:04 +0100 Subject: Typo: Obtaning -> Obtaining --- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 547b104..88876a1 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -188,7 +188,7 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) bRebootDesirable = TRUE; } else - msg->warning = "Obtaning SeLockMemoryPrivilege failed."; + msg->warning = "Obtaining SeLockMemoryPrivilege failed."; return 0; } -- cgit v1.1 From df855746b4ff1a9b9896d67250ac7d4680227a9c Mon Sep 17 00:00:00 2001 From: Lehmax Date: Thu, 28 Dec 2017 19:35:05 +0100 Subject: Improved SeLockMemoryPrivilege part I removed typos, changed the wording and made the whole thing more noob-friendly. --- doc/FAQ.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/FAQ.md b/doc/FAQ.md index 23507f2..c27ee89 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -1,7 +1,7 @@ # FAQ ## Content Overview -* [SeLockMemoryPrivilege failed](#selockmemoryprivilege-failed) +* ["Obtaining SeLockMemoryPrivilege failed."](#obtaining-selockmemoryprivilege-failed) * [VirtualAlloc failed](#virtualalloc-failed) * [Error msvcp140.dll and vcruntime140.dll not available](#error-msvcp140dll-and-vcruntime140dll-not-available) * [Error: MEMORY ALLOC FAILED: mmap failed](#error-memory-alloc-failed-mmap-failed) @@ -9,23 +9,26 @@ * [Virus Protection Alert](#virus-protection-alert) * [Change Currency to Mine](#change-currency-to-mine) -## SeLockMemoryPrivilege failed +## "Obtaining SeLockMemoryPrivilege failed." Please see [config.txt](config.txt) under section **LARGE PAGE SUPPORT** -For Windows 7 pro, or Windows 8 and above see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx) (make sure to reboot afterwards!). +For professional versions of Windows see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx). +Make sure to reboot afterwards! -For Windows 7 Home : +For Windows 7/10 Home: -1) Download and install [Windows Server 2003 Resource Kit Tools](https://www.microsoft.com/en-us/download/details.aspx?id=17657). Ignore incompatiablity warning during installation. +1) Download and install [Windows Server 2003 Resource Kit Tools](https://www.microsoft.com/en-us/download/details.aspx?id=17657). Ignore any incompatibility warning during installation. -2) In cmd or power shell: `ntrights -u %USERNAME% +r SeLockMemoryPrivilege` (where %USERNAME% is the user that will be running the program. This command needs to be run as admin) +2) Open cmd or PowerShell as an administrator. -3) Reboot. +3) Use `ntrights -u %USERNAME% +r SeLockMemoryPrivilege` where %USERNAME% is the user that will be running the program. + +4) Reboot. Reference: http://rybkaforum.net/cgi-bin/rybkaforum/topic_show.pl?pid=259791#pid259791 -*Warning: do not download ntrights.exe from any other site other then the offical Microsoft download page.* +*Warning: Do not download ntrights.exe from any other site other than the offical Microsoft download page.* ## VirtualAlloc failed -- cgit v1.1 From a406563d0104909aa4b5da84c99bdf4b90e62c0e Mon Sep 17 00:00:00 2001 From: Lehmax Date: Thu, 28 Dec 2017 19:40:37 +0100 Subject: Update FAQ.md --- doc/FAQ.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/FAQ.md b/doc/FAQ.md index c27ee89..641a50d 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -11,8 +11,6 @@ ## "Obtaining SeLockMemoryPrivilege failed." -Please see [config.txt](config.txt) under section **LARGE PAGE SUPPORT** - For professional versions of Windows see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx). Make sure to reboot afterwards! -- cgit v1.1 From be2974603f2443e65d830dce695b21bf6c0c59a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Gonz=C3=A1lez?= Date: Thu, 28 Dec 2017 20:34:06 +0100 Subject: install curl first it's not installed in ubuntu by default. --- doc/Linux_deployment.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/Linux_deployment.md b/doc/Linux_deployment.md index 323a97f..3219e8a 100644 --- a/doc/Linux_deployment.md +++ b/doc/Linux_deployment.md @@ -15,6 +15,7 @@ For automatic deployments, please use the steps above to obtain config.txt and u ``` #!/bin/bash +sudo apt install curl curl -O `curl -s https://api.github.com/repos/fireice-uk/xmr-stak/releases/latest | grep -o 'browser_download_url.*xmr-stak-portbin-linux.tar.gz' | sed 's/.*"//'` curl -O http://path.to/your/config.txt tar xzf xmr-stak-portbin-linux.tar.gz -- cgit v1.1 From caf839f4725ca84c8afc939f283a1c4377add934 Mon Sep 17 00:00:00 2001 From: Gene Date: Sat, 30 Dec 2017 04:50:06 -0500 Subject: added instructions for TinyCore Linux --- doc/compile_Linux.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md index b7104ac..92036c0 100644 --- a/doc/compile_Linux.md +++ b/doc/compile_Linux.md @@ -64,6 +64,33 @@ cd xmr-stak/build cmake .. make install + + # TinyCore Linux 8.x + # TinyCore is 32-bit only, but there is an x86-64 port, known as "Pure 64," + # hosted on the TinyCore home page, and it works well. + # Beware that huge page support is not enabled in the kernel distributed + # with Pure 64. Consider http://wiki.tinycorelinux.net/wiki:custom_kernel + # Note that as of yet there are no distro packages for microhttpd or hwloc. + # hwloc is easy enough to install manually though, shown below. + # Also note that only CPU mining has been tested on this platform, thus the + # disabling of CUDA and OpenCL shown below. + tce-load -iw openssl-dev.tcz cmake.tcz make.tcz gcc.tcz git.tcz \ + glibc_base-dev.tcz linux-4.8.1_api_headers.tcz \ + glibc_add_lib.tcz + wget https://www.open-mpi.org/software/hwloc/v1.11/downloads/hwloc-1.11.8.tar.gz + tar xzvf hwloc-1.11.8.tar.gz + cd hwloc-1.11.8 + ./configure --prefix=/usr/local + make + sudo make install + git clone http://github.com/fireice-uk/xmr-stak + cd xmr-stak + mkdir build + cd build + CC=gcc cmake .. -DCUDA_ENABLE=OFF \ + -DOpenCL_ENABLE=OFF \ + -DMICROHTTPD_ENABLE=OFF + make install ``` - g++ version 5.1 or higher is required for full C++11 support. -- cgit v1.1 From 84826bd00e8b2e08443e3d97b7029f74d3a67846 Mon Sep 17 00:00:00 2001 From: Gene Date: Sat, 30 Dec 2017 05:29:46 -0500 Subject: Update compile_Linux.md --- doc/compile_Linux.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md index 92036c0..5a1e762 100644 --- a/doc/compile_Linux.md +++ b/doc/compile_Linux.md @@ -83,6 +83,7 @@ ./configure --prefix=/usr/local make sudo make install + cd .. git clone http://github.com/fireice-uk/xmr-stak cd xmr-stak mkdir build -- cgit v1.1 From 58db6082a33a1233eff0b33ce9fba9cc5a9f5de8 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 30 Dec 2017 21:16:35 +0100 Subject: differgence in OpenCL code remove branch differgences in AMD OpenCl code based on #454 a Please enter the commit message for your changes. Lines starting --- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 104 ++++++++++------------ 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 255fcbb..ec05712 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -653,21 +653,11 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u for(int i = 0; i < 25; ++i) states[i] = State[i]; - switch(State[0] & 3) - { - case 0: - Branch0[atomic_inc(Branch0 + Threads)] = get_global_id(0) - get_global_offset(0); - break; - case 1: - Branch1[atomic_inc(Branch1 + Threads)] = get_global_id(0) - get_global_offset(0); - break; - case 2: - Branch2[atomic_inc(Branch2 + Threads)] = get_global_id(0) - get_global_offset(0); - break; - case 3: - Branch3[atomic_inc(Branch3 + Threads)] = get_global_id(0) - get_global_offset(0); - break; - } + ulong StateSwitch = State[0] & 3; + __global uint *destinationBranch1 = StateSwitch == 0 ? Branch0 : Branch1; + __global uint *destinationBranch2 = StateSwitch == 2 ? Branch2 : Branch3; + __global uint *destinationBranch = StateSwitch < 2 ? destinationBranch1 : destinationBranch2; + destinationBranch[atomic_inc(destinationBranch + Threads)] = gIdx; } } mem_fence(CLK_GLOBAL_MEM_FENCE); @@ -704,8 +694,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u for(uint i = 0; i < 4; ++i) { - if(i < 3) t[0] += 0x40UL; - else t[0] += 0x08UL; + t[0] += i < 3 ? 0x40UL : 0x08UL; t[2] = t[0] ^ t[1]; @@ -715,8 +704,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u h = m ^ p; - if(i < 2) t[1] = 0x3000000000000000UL; - else t[1] = 0xB000000000000000UL; + t[1] = i < 2 ? 0x3000000000000000UL : 0xB000000000000000UL; } t[0] = 0x08UL; @@ -744,6 +732,27 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u #define SWAP8(x) as_ulong(as_uchar8(x).s76543210) +#define JHXOR \ + h0h ^= input[0]; \ + h0l ^= input[1]; \ + h1h ^= input[2]; \ + h1l ^= input[3]; \ + h2h ^= input[4]; \ + h2l ^= input[5]; \ + h3h ^= input[6]; \ + h3l ^= input[7]; \ +\ + E8; \ +\ + h4h ^= input[0]; \ + h4l ^= input[1]; \ + h5h ^= input[2]; \ + h5l ^= input[3]; \ + h6h ^= input[4]; \ + h6l ^= input[5]; \ + h7h ^= input[6]; \ + h7l ^= input[7] + __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads) { const uint idx = get_global_id(0) - get_global_offset(0); @@ -757,46 +766,27 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint sph_u64 h4h = 0x754D2E7F8996A371UL, h4l = 0x62E27DF70849141DUL, h5h = 0x948F2476F7957627UL, h5l = 0x6C29804757B6D587UL, h6h = 0x6C0D8EAC2D275E5CUL, h6l = 0x0F7A0557C6508451UL, h7h = 0xEA12247067D3E47BUL, h7l = 0x69D71CD313ABE389UL; sph_u64 tmp; - for(int i = 0; i < 5; ++i) + for(int i = 0; i < 3; ++i) { ulong input[8]; - if(i < 3) - { - for(int x = 0; x < 8; ++x) input[x] = (states[(i << 3) + x]); - } - else if(i == 3) - { - input[0] = (states[24]); - input[1] = 0x80UL; - for(int x = 2; x < 8; ++x) input[x] = 0x00UL; - } - else - { - input[7] = 0x4006000000000000UL; - - for(int x = 0; x < 7; ++x) input[x] = 0x00UL; - } - - h0h ^= input[0]; - h0l ^= input[1]; - h1h ^= input[2]; - h1l ^= input[3]; - h2h ^= input[4]; - h2l ^= input[5]; - h3h ^= input[6]; - h3l ^= input[7]; - - E8; - - h4h ^= input[0]; - h4l ^= input[1]; - h5h ^= input[2]; - h5l ^= input[3]; - h6h ^= input[4]; - h6l ^= input[5]; - h7h ^= input[6]; - h7l ^= input[7]; + const int shifted = i << 3; + for(int x = 0; x < 8; ++x) input[x] = (states[shifted + x]); + JHXOR; + } + { + ulong input[8]; + input[0] = (states[24]); + input[1] = 0x80UL; + #pragma unroll 6 + for(int x = 2; x < 8; ++x) input[x] = 0x00UL; + JHXOR; + } + { + ulong input[8]; + for(int x = 0; x < 7; ++x) input[x] = 0x00UL; + input[7] = 0x4006000000000000UL; + JHXOR; } //output[0] = h6h; @@ -832,6 +822,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u ((uint8 *)h)[0] = vload8(0U, c_IV256); + #pragma unroll 4 for(uint i = 0, bitlen = 0; i < 4; ++i) { if(i < 3) @@ -907,6 +898,7 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global State[7] = 0x0001000000000000UL; + #pragma unroll 4 for(uint i = 0; i < 4; ++i) { ulong H[8], M[8]; -- cgit v1.1 From 16759bc35357e9e981544273c42125de443c18f3 Mon Sep 17 00:00:00 2001 From: Doug Johnson Date: Sat, 30 Dec 2017 16:49:51 -0700 Subject: Modify invalid result report to show GPU id --- xmrstak/backend/amd/minethd.cpp | 2 +- xmrstak/backend/nvidia/minethd.cpp | 2 +- xmrstak/misc/executor.cpp | 2 +- xmrstak/net/msgstruct.hpp | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index e83527c..85a48d3 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -245,7 +245,7 @@ void minethd::work_main() if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo), oWork.iPoolId)); else - executor::inst()->push_event(ex_event("AMD Invalid Result", oWork.iPoolId)); + executor::inst()->push_event(ex_event("AMD Invalid Result", pGpuCtx->deviceIdx, oWork.iPoolId)); } iCount += pGpuCtx->rawIntensity; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 5564596..d1e2eb4 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -287,7 +287,7 @@ void minethd::work_main() if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo), oWork.iPoolId)); else - executor::inst()->push_event(ex_event("NVIDIA Invalid Result", oWork.iPoolId)); + executor::inst()->push_event(ex_event("NVIDIA Invalid Result", ctx.device_id, oWork.iPoolId)); } iCount += h_per_round; diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index a3088a5..f0b6e0e 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -594,7 +594,7 @@ void executor::ex_main() break; case EV_GPU_RES_ERROR: - log_result_error(std::string(ev.oGpuError.error_str)); + log_result_error(std::string(ev.oGpuError.error_str + std::string(" GPU ID ") + std::to_string(ev.oGpuError.idx))); break; case EV_PERF_TICK: diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp index a5affc8..8c4bdbe 100644 --- a/xmrstak/net/msgstruct.hpp +++ b/xmrstak/net/msgstruct.hpp @@ -66,8 +66,9 @@ struct sock_err // Unlike socket errors, GPU errors are read-only strings struct gpu_res_err { + size_t idx; // GPU index const char* error_str; - gpu_res_err(const char* error_str) : error_str(error_str) {} + gpu_res_err(const char* error_str, size_t idx) : error_str(error_str), idx(idx) {} }; enum ex_event_name { EV_INVALID_VAL, EV_SOCK_READY, EV_SOCK_ERROR, EV_GPU_RES_ERROR, @@ -99,7 +100,7 @@ struct ex_event }; ex_event() { iName = EV_INVALID_VAL; iPoolId = 0;} - ex_event(const char* gpu_err, size_t id) : iName(EV_GPU_RES_ERROR), iPoolId(id), oGpuError(gpu_err) {} + ex_event(const char* gpu_err, size_t gpu_idx, size_t id) : iName(EV_GPU_RES_ERROR), iPoolId(id), oGpuError(gpu_err, gpu_idx) {} ex_event(std::string&& err, bool silent, size_t id) : iName(EV_SOCK_ERROR), iPoolId(id), oSocketError(std::move(err), silent) { } ex_event(job_result dat, size_t id) : iName(EV_MINER_HAVE_RESULT), iPoolId(id), oJobResult(dat) {} ex_event(pool_job dat, size_t id) : iName(EV_POOL_HAVE_JOB), iPoolId(id), oPoolJob(dat) {} -- cgit v1.1 From 0c845b3569f0a2c9524f98d4ca9b6866288fe3d0 Mon Sep 17 00:00:00 2001 From: Doug Johnson Date: Sat, 30 Dec 2017 23:59:03 -0700 Subject: Add warning and fallback when auto intensity is 0 Occassionally the auto adjust doesn't find enough memory and the intensity is detected too low and aligned to 0 with the compute units. This patch fixes this situation by issuing a warning with a suggestion to set environment vars and then ignoring the alignment to 0 Per several issues: Principally: https://github.com/fireice-uk/xmr-stak/issues/81 Related: https://github.com/fireice-uk/xmr-stak/issues/490 https://github.com/fireice-uk/xmr-stak/issues/472 --- xmrstak/backend/amd/autoAdjust.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 0bc5239..4673613 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -118,6 +118,19 @@ private: size_t possibleIntensity = std::min( maxThreads , maxIntensity ); // map intensity to a multiple of the compute unit count, 8 is the number of threads per work group size_t intensity = (possibleIntensity / (8 * ctx.computeUnits)) * ctx.computeUnits * 8; + //If the intensity is 0, then it's because the multiple of the unit count is greater than intensity + if (intensity == 0) { + /* See Issues: + * https://github.com/fireice-uk/xmr-stak/issues/81 + * https://github.com/fireice-uk/xmr-stak/issues/472 + * https://github.com/fireice-uk/xmr-stak/issues/490 + * Note that it appears that Northern Islands GPUs (HD 6XXX) are unaffected by + * these environment variables, according to my testing (dougvj) + */ + printer::inst()->print_msg(L0, "WARNING: Autodetected intensity unexpectedly low. Try setting GPU_SINGLE_ALLOC_PERCENT and etc."); + intensity = possibleIntensity; + + } conf += std::string(" // gpu: ") + ctx.name + " memory:" + std::to_string(availableMem / byteToMiB) + "\n"; conf += std::string(" // compute units: ") + std::to_string(ctx.computeUnits) + "\n"; // set 8 threads per block (this is a good value for the most gpus) -- cgit v1.1 From 1d60d43fbc24133d13321401c550785075c219e2 Mon Sep 17 00:00:00 2001 From: Vladimir Tamara Date: Mon, 1 Jan 2018 19:20:24 -0500 Subject: It compiles on OpenBSD/adJ 6.2 --- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 3 +++ xmrstak/backend/cpu/minethd.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 88876a1..1026b04 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -253,6 +253,9 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al #elif defined(__FreeBSD__) ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); +#elif defined(__OpenBSD__) + ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); #else ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 143b66f..717c928 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -84,7 +84,7 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id CPU_ZERO(&mn); CPU_SET(cpu_id, &mn); return pthread_setaffinity_np(h, sizeof(cpuset_t), &mn) == 0; -#else +#elif !defined(__OpenBSD__) cpu_set_t mn; CPU_ZERO(&mn); CPU_SET(cpu_id, &mn); -- cgit v1.1 From cb5ff03b32cfd1787a3933fd05ce05bfb2d92d28 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 3 Jan 2018 09:59:31 +0100 Subject: change psychocrypt's donation address --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 788a072..112ceeb 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ fireice-uk: psychocrypt: ``` -43NoJVEXo21hGZ6tDG6Z3g4qimiGdJPE6GRxAmiWwm26gwr62Lqo7zRiCJFSBmbkwTGNuuES9ES5TgaVHceuYc4Y75txCTU +45tcqnJMgd3VqeTznNotiNj4G9PQoK67TGRiHyj6EYSZ31NUbAfs9XdiU5squmZb717iHJLxZv3KfEw8jCYGL5wa19yrVCn ``` ## Release Checksums -- cgit v1.1 From aa556eafccd63d0cd4241fd4935d3f0504c4df3c Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 3 Jan 2018 20:58:38 +0100 Subject: remove warning remove warning `#pragma once in main file` --- xmrstak/misc/uac.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/xmrstak/misc/uac.cpp b/xmrstak/misc/uac.cpp index 5e8d08a..ad9d394 100644 --- a/xmrstak/misc/uac.cpp +++ b/xmrstak/misc/uac.cpp @@ -1,5 +1,3 @@ -#pragma once - #ifdef _WIN32 #include "xmrstak/misc/console.hpp" #include "xmrstak/params.hpp" -- cgit v1.1 From d01bab0cd73181353cbc8ae61ec5712b06fcb775 Mon Sep 17 00:00:00 2001 From: Brian Recchia Date: Tue, 2 Jan 2018 16:09:40 -0500 Subject: Update minethd.cpp Changed capitalization of "macOS" Squashed the commit --- README.md | 2 +- doc/compile.md | 4 ++-- doc/compile_MacOS.md | 31 ------------------------------- doc/compile_macOS.md | 31 +++++++++++++++++++++++++++++++ doc/usage.md | 2 +- xmrstak/backend/amd/minethd.cpp | 2 +- xmrstak/backend/cpu/minethd.cpp | 2 +- xmrstak/backend/nvidia/minethd.cpp | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) delete mode 100644 doc/compile_MacOS.md create mode 100644 doc/compile_macOS.md diff --git a/README.md b/README.md index 788a072..d6f857f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV ## Features - support all common backends (CPU/x86, AMD-GPU and NVIDIA-GPU) -- support all common OS (Linux, Windows and MacOS) +- support all common OS (Linux, Windows and macOS) - supports algorithm cryptonight for Monero (XMR) and cryptonight-light (AEON) - easy to use - guided start (no need to edit a config file for the first start) diff --git a/doc/compile.md b/doc/compile.md index e97affa..771c9d1 100644 --- a/doc/compile.md +++ b/doc/compile.md @@ -9,7 +9,7 @@ * [Compile on Windows](compile_Windows.md) * [Compile on Linux](compile_Linux.md) * [Compile on FreeBSD](compile_FreeBSD.md) -* [Compile on MacOS](compile_MacOS.md) +* [Compile on macOS](compile_macOS.md) ## Build System @@ -31,7 +31,7 @@ After the configuration you need to compile the miner, follow the guide for your * [Compile in Windows](compile_Windows.md) * [Compile in Linux](compile_Linux.md) * [Compile in FreeBSD](compile_FreeBSD.md) -* [Compile in MacOS](compile_MacOS.md) +* [Compile in macOS](compile_macOS.md) ## Generic Build Options - `CMAKE_INSTALL_PREFIX` install miner to the home folder diff --git a/doc/compile_MacOS.md b/doc/compile_MacOS.md deleted file mode 100644 index 1b0af91..0000000 --- a/doc/compile_MacOS.md +++ /dev/null @@ -1,31 +0,0 @@ -# Compile **xmr-stak** for MacOS - -## Dependencies - -Assuming you already have [Homebrew](https://brew.sh) installed, the installation of dependencies is pretty straightforward and will generate the `xmr-stak` binary in the `bin/` directory. - -### For NVIDIA GPUs - -```shell -brew tap caskroom/drivers -brew cask install nvidia-cuda -brew install hwloc libmicrohttpd gcc openssl cmake -cmake . -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DOpenCL_ENABLE=OFF -make install -``` - -[All available CMake options](compile.md#nvidia-build-options) - -### For AMD GPUs - -> 🖐 We need help with AMD GPU compilation instructions. Please submit a PR if you managed to install [AMD APP SDK](http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/) and to compile `xmr-stak` on MacOS. - -### For CPU-only mining - -```shell -brew install hwloc libmicrohttpd gcc openssl cmake -cmake . -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF -make install -``` - -[All available CMake options](compile.md#cpu-build-options) diff --git a/doc/compile_macOS.md b/doc/compile_macOS.md new file mode 100644 index 0000000..6eb66b3 --- /dev/null +++ b/doc/compile_macOS.md @@ -0,0 +1,31 @@ +# Compile **xmr-stak** for macOS + +## Dependencies + +Assuming you already have [Homebrew](https://brew.sh) installed, the installation of dependencies is pretty straightforward and will generate the `xmr-stak` binary in the `bin/` directory. + +### For NVIDIA GPUs + +```shell +brew tap caskroom/drivers +brew cask install nvidia-cuda +brew install hwloc libmicrohttpd gcc openssl cmake +cmake . -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DOpenCL_ENABLE=OFF +make install +``` + +[All available CMake options](compile.md#nvidia-build-options) + +### For AMD GPUs + +> 🖐 We need help with AMD GPU compilation instructions. Please submit a PR if you managed to install [AMD APP SDK](http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/) and to compile `xmr-stak` on macOS. + +### For CPU-only mining + +```shell +brew install hwloc libmicrohttpd gcc openssl cmake +cmake . -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF +make install +``` + +[All available CMake options](compile.md#cpu-build-options) diff --git a/doc/usage.md b/doc/usage.md index 60cf69b..a810469 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -23,7 +23,7 @@ The number of files depends on the available backends. `set XMRSTAK_NOWAIT=1` disable the dialog `Press any key to exit.` for non UAC execution. -## Usage on Linux & MacOS +## Usage on Linux & macOS 1) Open a terminal within the folder with the binary 2) Start the miner with `./xmr-stak` diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index e83527c..0ee3f8e 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -139,7 +139,7 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor if(cfg.cpu_aff >= 0) { #if defined(__APPLE__) - printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); + printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory."); #endif printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 143b66f..48425e5 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -307,7 +307,7 @@ std::vector minethd::thread_starter(uint32_t threadOffset, miner_work if(cfg.iCpuAff >= 0) { #if defined(__APPLE__) - printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); + printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory."); #endif printer::inst()->print_msg(L1, "Starting %dx thread, affinity: %d.", cfg.iMultiway, (int)cfg.iCpuAff); diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 5564596..cc6ea24 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -166,7 +166,7 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor if(cfg.cpu_aff >= 0) { #if defined(__APPLE__) - printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); + printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory."); #endif printer::inst()->print_msg(L1, "Starting NVIDIA GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff); -- cgit v1.1 From 78cf4351d9130b7ceac5dca700afdf742111941d Mon Sep 17 00:00:00 2001 From: ChaosMarc Date: Fri, 5 Jan 2018 12:08:12 +0100 Subject: #125: separate hashrate sums for each component --- xmrstak/misc/executor.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index f0b6e0e..680f045 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -761,6 +761,7 @@ void executor::hashrate_report(std::string& out) else out.append(1, '\n'); + double fTotalCur[3] = { 0.0, 0.0, 0.0}; for (i = 0; i < nthd; i++) { double fHps[3]; @@ -779,6 +780,10 @@ void executor::hashrate_report(std::string& out) fTotal[0] += fHps[0]; fTotal[1] += fHps[1]; fTotal[2] += fHps[2]; + + fTotalCur[0] += fHps[0]; + fTotalCur[1] += fHps[1]; + fTotalCur[2] += fHps[2]; if((i & 0x1) == 1) //Odd i's out.append("|\n"); @@ -786,21 +791,25 @@ void executor::hashrate_report(std::string& out) if((i & 0x1) == 1) //We had odd number of threads out.append("|\n"); - - if(nthd != 1) - out.append("-----------------------------------------------------\n"); - else - out.append("---------------------------\n"); + + out.append("Totals (").append(name).append("): "); + out.append(hps_format(fTotalCur[0], num, sizeof(num))); + out.append(hps_format(fTotalCur[1], num, sizeof(num))); + out.append(hps_format(fTotalCur[2], num, sizeof(num))); + out.append(" H/s\n"); + + out.append("-----------------------------------------------------------------\n"); } } - out.append("Totals: "); + out.append("Totals (ALL): "); out.append(hps_format(fTotal[0], num, sizeof(num))); out.append(hps_format(fTotal[1], num, sizeof(num))); out.append(hps_format(fTotal[2], num, sizeof(num))); out.append(" H/s\nHighest: "); out.append(hps_format(fHighestHps, num, sizeof(num))); out.append(" H/s\n"); + out.append("-----------------------------------------------------------------\n"); } char* time_format(char* buf, size_t len, std::chrono::system_clock::time_point time) -- cgit v1.1 From 7b3f2942d3dafa042fe5b99aec209b200a3071d9 Mon Sep 17 00:00:00 2001 From: ChaosMarc Date: Fri, 5 Jan 2018 12:30:25 +0100 Subject: display 0.0 instead of (na) for totals --- xmrstak/misc/executor.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 680f045..6d06411 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -776,14 +776,14 @@ void executor::hashrate_report(std::string& out) out.append(hps_format(fHps[0], num, sizeof(num))).append(" |"); out.append(hps_format(fHps[1], num, sizeof(num))).append(" |"); out.append(hps_format(fHps[2], num, sizeof(num))).append(1, ' '); - - fTotal[0] += fHps[0]; - fTotal[1] += fHps[1]; - fTotal[2] += fHps[2]; - fTotalCur[0] += fHps[0]; - fTotalCur[1] += fHps[1]; - fTotalCur[2] += fHps[2]; + fTotal[0] += (std::isnormal(fHps[0])) ? fHps[0] : 0.0; + fTotal[0] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; + fTotal[0] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; + + fTotalCur[0] += (std::isnormal(fHps[0])) ? fHps[0] : 0.0; + fTotalCur[0] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; + fTotalCur[0] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; if((i & 0x1) == 1) //Odd i's out.append("|\n"); -- cgit v1.1 From 6d21de7f7d38324e980255ed116d21f86ba740e5 Mon Sep 17 00:00:00 2001 From: ChaosMarc Date: Mon, 8 Jan 2018 10:14:43 +0100 Subject: fixed messed up array indexes --- xmrstak/misc/executor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 6d06411..055739b 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -778,12 +778,12 @@ void executor::hashrate_report(std::string& out) out.append(hps_format(fHps[2], num, sizeof(num))).append(1, ' '); fTotal[0] += (std::isnormal(fHps[0])) ? fHps[0] : 0.0; - fTotal[0] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; - fTotal[0] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; + fTotal[1] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; + fTotal[2] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; fTotalCur[0] += (std::isnormal(fHps[0])) ? fHps[0] : 0.0; - fTotalCur[0] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; - fTotalCur[0] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; + fTotalCur[1] += (std::isnormal(fHps[1])) ? fHps[1] : 0.0; + fTotalCur[2] += (std::isnormal(fHps[2])) ? fHps[2] : 0.0; if((i & 0x1) == 1) //Odd i's out.append("|\n"); -- cgit v1.1 From a45b38bf670d64aea464b2155f37baf0841ad9bc Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Mon, 8 Jan 2018 18:51:04 +0000 Subject: Fix bug in pool-side hashes --- xmrstak/misc/executor.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/xmrstak/misc/executor.hpp b/xmrstak/misc/executor.hpp index c2caa39..fbaa265 100644 --- a/xmrstak/misc/executor.hpp +++ b/xmrstak/misc/executor.hpp @@ -177,7 +177,6 @@ private: iPoolCallTimes.clear(); tPoolConnTime = std::chrono::system_clock::now(); iPoolHashes = 0; - iPoolDiff = 0; } double fHighestHps = 0.0; -- cgit v1.1 From 6a508ce409e2f634f81ed26a959aa88d0414d542 Mon Sep 17 00:00:00 2001 From: Tom Doemiller Date: Tue, 9 Jan 2018 13:57:52 +0000 Subject: Fix cache size detection --- xmrstak/backend/cpu/autoAdjust.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index 7bdb14e..b5575e9 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -142,7 +142,7 @@ private: } L3KB_size = ((get_masked(cpu_info[1], 31, 22) + 1) * (get_masked(cpu_info[1], 21, 12) + 1) * - (get_masked(cpu_info[1], 11, 0) + 1) * (cpu_info[2] + 1)) / halfHashMemSize; + (get_masked(cpu_info[1], 11, 0) + 1) * (cpu_info[2] + 1)) / 1024; return true; } -- cgit v1.1 From 054ee94f9efb8d0fda52e368c3d357dea47575da Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 9 Jan 2018 21:01:30 +0100 Subject: fix usage of bytes instead of KB bug was introduced with #67 - increase the L3 sanity check to 2GiB - fix usage of byte instead of KB --- xmrstak/backend/cpu/autoAdjust.hpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index b5575e9..db805ec 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -33,25 +33,21 @@ class autoAdjust { public: - size_t hashMemSize; - size_t halfHashMemSize; - - autoAdjust() + bool printConfig() { + size_t hashMemSizeKB; + size_t halfHashMemSizeKB; + if(::jconf::inst()->IsCurrencyMonero()) { - hashMemSize = MONERO_MEMORY; - halfHashMemSize = hashMemSize / 2u; + hashMemSizeKB = MONERO_MEMORY / 1024u; + halfHashMemSizeKB = hashMemSizeKB / 2u; } else { - hashMemSize = AEON_MEMORY; - halfHashMemSize = hashMemSize / 2u; + hashMemSizeKB = AEON_MEMORY / 1024u; + halfHashMemSizeKB = hashMemSizeKB / 2u; } - } - - bool printConfig() - { configEditor configTpl{}; @@ -63,9 +59,10 @@ public: std::string conf; - if(!detectL3Size() || L3KB_size < halfHashMemSize || L3KB_size > (halfHashMemSize * 100u)) + + if(!detectL3Size() || L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048u)) { - if(L3KB_size < halfHashMemSize || L3KB_size > (halfHashMemSize * 100)) + if(L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048)) printer::inst()->print_msg(L0, "Autoconf failed: L3 size sanity check failed - %u KB.", L3KB_size); conf += std::string(" { \"low_power_mode\" : false, \"no_prefetch\" : true, \"affine_to_cpu\" : false },\n"); @@ -88,7 +85,7 @@ public: if(L3KB_size <= 0) break; - double_mode = L3KB_size / hashMemSize > (int32_t)(corecnt-i); + double_mode = L3KB_size / hashMemSizeKB > (int32_t)(corecnt-i); conf += std::string(" { \"low_power_mode\" : "); conf += std::string(double_mode ? "true" : "false"); @@ -107,9 +104,9 @@ public: aff_id++; if(double_mode) - L3KB_size -= hashMemSize * 2u; + L3KB_size -= hashMemSizeKB * 2u; else - L3KB_size -= hashMemSize; + L3KB_size -= hashMemSizeKB; } } -- cgit v1.1 From eb4967b5bff3a909796e1783f18e579639becde7 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 9 Jan 2018 21:37:06 +0100 Subject: update VEGA names for auto suggestion update VEGA names --- xmrstak/backend/amd/autoAdjust.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 0bc5239..511a712 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -101,7 +101,16 @@ private: * sowing down the memory performance because of TLB cache misses */ size_t maxThreads = 1000u; - if(ctx.name.compare("gfx901") == 0) + if( + ctx.name.compare("gfx901") == 0 || + ctx.name.compare("gfx904") == 0 || + // APU + ctx.name.compare("gfx902") == 0 || + // UNKNOWN + ctx.name.compare("gfx900") == 0 || + ctx.name.compare("gfx903") == 0 || + ctx.name.compare("gfx905") == 0 + ) { /* Increase the number of threads for AMD VEGA gpus. * Limit the number of threads based on the issue: https://github.com/fireice-uk/xmr-stak/issues/5#issuecomment-339425089 -- cgit v1.1 From 91a2dccb4f63ceea149cc74d36fea5a19681304d Mon Sep 17 00:00:00 2001 From: Michael Hohl Date: Wed, 10 Jan 2018 20:31:15 +0100 Subject: AMD compile instructions for macOS (#811) * compile howto on amd mac tested on the latest 15" MacBook Pro * Add a note about OpenCL/Xcode --- doc/compile_macOS.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/compile_macOS.md b/doc/compile_macOS.md index 6eb66b3..46f1d5b 100644 --- a/doc/compile_macOS.md +++ b/doc/compile_macOS.md @@ -18,7 +18,13 @@ make install ### For AMD GPUs -> 🖐 We need help with AMD GPU compilation instructions. Please submit a PR if you managed to install [AMD APP SDK](http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/) and to compile `xmr-stak` on macOS. +OpenCL is bundled with Xcode, so no other depedency then the basic ones needed. Just enable OpenCL via the `-DOpenCL_ENABLE=ON` CMake option. + +```shell +brew install hwloc libmicrohttpd gcc openssl cmake +cmake . -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DCUDA_ENABLE=OFF -DOpenCL_ENABLE=ON +make install +``` ### For CPU-only mining -- cgit v1.1 From a1bd6c2a576111d78c67fba6fd0aa16e1d68fc63 Mon Sep 17 00:00:00 2001 From: morganamilo Date: Thu, 11 Jan 2018 06:32:56 +0000 Subject: Add --needed to pacman install Only install if the user doesnt already have the packages installed, no point in reinstalling everything. --- doc/compile_Linux.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md index b7104ac..e314dd5 100644 --- a/doc/compile_Linux.md +++ b/doc/compile_Linux.md @@ -25,7 +25,7 @@ make install # Arch - sudo pacman -S base-devel hwloc openssl cmake libmicrohttpd + sudo pacman -S --needed base-devel hwloc openssl cmake libmicrohttpd git clone https://github.com/fireice-uk/xmr-stak.git mkdir xmr-stak/build cd xmr-stak/build -- cgit v1.1 From 14f60635915f545fce2f61117ccf87143c7629cc Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 13 Jan 2018 20:27:02 +0100 Subject: ignore gpu with intensity zero - if the intensity is zero than do not suggest a config - remove the links to old issues --- xmrstak/backend/amd/autoAdjust.hpp | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 4673613..c16edac 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -94,7 +94,6 @@ private: } std::string conf; - int i = 0; for(auto& ctx : devVec) { /* 1000 is a magic selected limit, the reason is that more than 2GiB memory @@ -119,26 +118,26 @@ private: // map intensity to a multiple of the compute unit count, 8 is the number of threads per work group size_t intensity = (possibleIntensity / (8 * ctx.computeUnits)) * ctx.computeUnits * 8; //If the intensity is 0, then it's because the multiple of the unit count is greater than intensity - if (intensity == 0) { - /* See Issues: - * https://github.com/fireice-uk/xmr-stak/issues/81 - * https://github.com/fireice-uk/xmr-stak/issues/472 - * https://github.com/fireice-uk/xmr-stak/issues/490 - * Note that it appears that Northern Islands GPUs (HD 6XXX) are unaffected by - * these environment variables, according to my testing (dougvj) - */ - printer::inst()->print_msg(L0, "WARNING: Autodetected intensity unexpectedly low. Try setting GPU_SINGLE_ALLOC_PERCENT and etc."); + if (intensity == 0) + { + printer::inst()->print_msg(L0, "WARNING: Auto detected intensity unexpectedly low. Try to set the environment variable GPU_SINGLE_ALLOC_PERCENT."); intensity = possibleIntensity; } - conf += std::string(" // gpu: ") + ctx.name + " memory:" + std::to_string(availableMem / byteToMiB) + "\n"; - conf += std::string(" // compute units: ") + std::to_string(ctx.computeUnits) + "\n"; - // set 8 threads per block (this is a good value for the most gpus) - conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + - " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : true\n" - " },\n"; - ++i; + if (intensity != 0) + { + conf += std::string(" // gpu: ") + ctx.name + " memory:" + std::to_string(availableMem / byteToMiB) + "\n"; + conf += std::string(" // compute units: ") + std::to_string(ctx.computeUnits) + "\n"; + // set 8 threads per block (this is a good value for the most gpus) + conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + + " \"affine_to_cpu\" : false, \"strided_index\" : true\n" + " },\n"; + } + else + { + printer::inst()->print_msg(L0, "WARNING: Ignore gpu %s, %s MiB free memory is not enough to suggest settings.", ctx.name.c_str(), std::to_string(availableMem / byteToMiB).c_str()); + } } configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex)); -- cgit v1.1 From 6e59483f82edd09a8f90751d832e7c2794e42c67 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 13 Jan 2018 21:23:59 +0100 Subject: fix set affinity for windows Ignore any affinity >=64 and throw a warning. --- xmrstak/backend/cpu/minethd.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 48425e5..f30d1fe 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -73,7 +73,16 @@ namespace cpu bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id) { #if defined(_WIN32) - return SetThreadAffinityMask(h, 1ULL << cpu_id) != 0; + // we can only pin up to 64 threads + if(cpu_id < 64) + { + return SetThreadAffinityMask(h, 1ULL << cpu_id) != 0; + } + else + { + printer::inst()->print_msg(L0, "WARNING: Windows supports only affinity up to 63."); + return false; + } #elif defined(__APPLE__) thread_port_t mach_thread; thread_affinity_policy_data_t policy = { static_cast(cpu_id) }; -- cgit v1.1 From 05b98280e323b694db5d1fa583f19be8248df211 Mon Sep 17 00:00:00 2001 From: dam-ien <4492999+dam-ien@users.noreply.github.com> Date: Mon, 15 Jan 2018 18:21:05 -0500 Subject: Fix for compiling with Cuda 9.1 on VS 2017 version >= 15.5 Fix for #750, #626, #606, #534, without having to downgrade and do not require modification of host_config.h Require an additional component during VS2017 installation (VC++ 2017 version 15.4 v14.11 toolset) and an additional command execution before compilation. --- doc/compile_Windows.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index c9a8ff7..adf99cb 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -13,6 +13,7 @@ - during the install chose the components - `Desktop development with C++` (left side) - `VC++ 2015.3 v140 toolset for desktop` (right side) + - Since release of VS2017 15.5 (12/04/17), require `VC++ 2017 version 15.4 v14.11 toolset` (under tab `Individual Components`, section `Compilers, build tools, and runtimes`), as CUDA 9.1 is not compatible with compiler 14.12.X ### CMake for Win64 @@ -80,6 +81,8 @@ - `cd` to your unzipped source code directory - execute the following commands (NOTE: path to VS2017 can be different) ``` + # Next line is only if compiling for Cuda 9.1 and using Visual Studio 2017 >= 15.5 (released 12/04/17) + "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=14.11 "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsMSBuildCmd.bat" set CMAKE_PREFIX_PATH=C:\xmr-stak-dep\hwloc;C:\xmr-stak-dep\libmicrohttpd;C:\xmr-stak-dep\openssl mkdir build -- cgit v1.1 From 4f109d0335d96e9688c6c31ec738ebfc87937384 Mon Sep 17 00:00:00 2001 From: Vladimir Tamara Date: Tue, 16 Jan 2018 22:59:42 -0500 Subject: Extra code path for OpenBSD suggested by psychocrypt --- xmrstak/backend/cpu/minethd.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 717c928..b41e4a9 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -84,7 +84,9 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id CPU_ZERO(&mn); CPU_SET(cpu_id, &mn); return pthread_setaffinity_np(h, sizeof(cpuset_t), &mn) == 0; -#elif !defined(__OpenBSD__) +#elif defined(__OpenBSD__) + printer::inst()->print_msg(L0,"WARNING: thread pinning is not supported under OPENBSD."); +#else cpu_set_t mn; CPU_ZERO(&mn); CPU_SET(cpu_id, &mn); -- cgit v1.1 From 520c502dc2da0525ba8885223628fb8040c786b0 Mon Sep 17 00:00:00 2001 From: SeaDude Date: Tue, 16 Jan 2018 22:33:57 -0800 Subject: Simplified compile instructions for noobs There are a lot of hiccups on this step in the AEON Slack Channel. --- doc/compile_Windows.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index f898867..970c8ab 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -75,10 +75,11 @@ ## Compile -- download and unzip `xmr-stak` +- download `xmr-stak` [Source Code.zip](https://github.com/fireice-uk/xmr-stak/releases) and save to a location in your `User` folder +- extract the Source Code.zip file - open the command line terminal `cmd` -- `cd` to your unzipped source code directory -- execute the following commands (NOTE: path to VS2017 can be different) +- `cd` to the extracted source code directory +- execute the following commands (NOTE: path to Visual Studio 2017 Community can be different) ``` "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsMSBuildCmd.bat" set CMAKE_PREFIX_PATH=C:\xmr-stak-dep\hwloc;C:\xmr-stak-dep\libmicrohttpd;C:\xmr-stak-dep\openssl -- cgit v1.1 From 8a3b6984cd35d64cfd22db83d13690b2e62eb5f0 Mon Sep 17 00:00:00 2001 From: SeaDude Date: Tue, 16 Jan 2018 23:22:08 -0800 Subject: Added Mercenary's suggestion --- doc/compile_Windows.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index 970c8ab..3b236be 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -75,7 +75,7 @@ ## Compile -- download `xmr-stak` [Source Code.zip](https://github.com/fireice-uk/xmr-stak/releases) and save to a location in your `User` folder +- download `xmr-stak` [Source Code.zip](https://github.com/fireice-uk/xmr-stak/releases) and save to a location in your home folder (C:\Users\USERNAME\) - extract the Source Code.zip file - open the command line terminal `cmd` - `cd` to the extracted source code directory @@ -86,6 +86,8 @@ mkdir build cd build ``` + - Sometimes Windows will `cd` to `C:\Users\USERNAME\source\build\` instead of `C:\Users\USERNAME\xmr-stak-\build`. Ensure you are in the correct `build` directory before proceeding. + - for CUDA 8* ``` cmake -G "Visual Studio 15 2017 Win64" -T v140,host=x64 .. -- cgit v1.1 From dc429fede1bb2f85c5ea47d9a3db8cf06833f824 Mon Sep 17 00:00:00 2001 From: denis-roy Date: Sat, 20 Jan 2018 16:34:06 -0500 Subject: Fixed Minor Typo CUDA/Develpment -> CUDA/Development --- doc/compile_Windows.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index c9a8ff7..246a2db 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -24,7 +24,7 @@ - donwload and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) - for minimal install choose `Custom installation options` during the install and select - - CUDA/Develpment + - CUDA/Development - CUDA/Visual Studio Integration (ignore the warning during the install that VS2017 is not supported) - CUDA/Runtime - Driver components -- cgit v1.1 From ea6c147ab201a703c9eec1f5cd03fde670ded4d9 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 22 Jan 2018 21:02:05 +0100 Subject: reduce memory usage for low end gpus reduce memory usage to 1GiB for NVIDIA devices with <=6 SMX --- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index d865e13..d5d0039 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -441,6 +441,12 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) maxMemUsage = size_t(1024u) * byteToMiB; } + if(props.multiProcessorCount <= 6) + { + // limit memory usage for low end devices to reduce the number of threads + maxMemUsage = size_t(1024u) * byteToMiB; + } + int* tmp; cudaError_t err; // a device must be selected to get the right memory usage later on -- cgit v1.1 From 617af4b301582e1373a94c0f34bad754a1f5bc76 Mon Sep 17 00:00:00 2001 From: Grant Galitz Date: Sun, 7 Jan 2018 23:41:34 -0500 Subject: Optimize Skein - Eliminate modulus math (It runs slow inside microcode). - Convert whatever the hell was going on into a rotate op. Like... Someone kinda reinvented the wheel in order to do a simple rotate. --- xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl | 78 +++++++++++++++--------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl index 868757b..bebc2ab 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl @@ -22,68 +22,59 @@ static const __constant ulong SKEIN512_256_IV[8] = 0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL }; -#define SKEIN_INJECT_KEY(p, s) do { \ +#define SKEIN_INJECT_KEY(p, s, q) do { \ p += h; \ - p.s5 += t[s % 3]; \ - p.s6 += t[(s + 1) % 3]; \ - p.s7 += s; \ + p.s5 += t[s]; \ + p.s6 += t[select(s + 1U, 0U, s == 2U)]; \ + p.s7 += q; \ } while(0) -ulong SKEIN_ROT(const uint2 x, const uint y) -{ - if(y < 32) return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); - else return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); -} - -void SkeinMix8(ulong4 *pv0, ulong4 *pv1, const uint rc0, const uint rc1, const uint rc2, const uint rc3) +void SkeinMix8(ulong4 *pv0, ulong4 *pv1, const ulong4 rc) { *pv0 += *pv1; - (*pv1).s0 = SKEIN_ROT(as_uint2((*pv1).s0), rc0); - (*pv1).s1 = SKEIN_ROT(as_uint2((*pv1).s1), rc1); - (*pv1).s2 = SKEIN_ROT(as_uint2((*pv1).s2), rc2); - (*pv1).s3 = SKEIN_ROT(as_uint2((*pv1).s3), rc3); + *pv1 = rotate(*pv1, (ulong4)rc); *pv1 ^= *pv0; } -ulong8 SkeinEvenRound(ulong8 p, const ulong8 h, const ulong *t, const uint s) +ulong8 SkeinEvenRound(ulong8 p, const ulong8 h, const ulong *t, const uint s, const uint q) { - SKEIN_INJECT_KEY(p, s); + SKEIN_INJECT_KEY(p, s, q); ulong4 pv0 = p.even, pv1 = p.odd; - SkeinMix8(&pv0, &pv1, 46, 36, 19, 37); + SkeinMix8(&pv0, &pv1, (ulong4)(46, 36, 19, 37)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 33, 27, 14, 42); + SkeinMix8(&pv0, &pv1, (ulong4)(33, 27, 14, 42)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 17, 49, 36, 39); + SkeinMix8(&pv0, &pv1, (ulong4)(17, 49, 36, 39)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 44, 9, 54, 56); + SkeinMix8(&pv0, &pv1, (ulong4)(44, 9, 54, 56)); return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5))); } -ulong8 SkeinOddRound(ulong8 p, const ulong8 h, const ulong *t, const uint s) +ulong8 SkeinOddRound(ulong8 p, const ulong8 h, const ulong *t, const uint s, const uint q) { - SKEIN_INJECT_KEY(p, s); + SKEIN_INJECT_KEY(p, s, q); ulong4 pv0 = p.even, pv1 = p.odd; - SkeinMix8(&pv0, &pv1, 39, 30, 34, 24); + SkeinMix8(&pv0, &pv1, (ulong4)(39, 30, 34, 24)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 13, 50, 10, 17); + SkeinMix8(&pv0, &pv1, (ulong4)(13, 50, 10, 17)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 25, 29, 39, 43); + SkeinMix8(&pv0, &pv1, (ulong4)(25, 29, 39, 43)); pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); - SkeinMix8(&pv0, &pv1, 8, 35, 56, 22); + SkeinMix8(&pv0, &pv1, (ulong4)(8, 35, 56, 22)); return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5))); } @@ -92,20 +83,47 @@ ulong8 Skein512Block(ulong8 p, ulong8 h, ulong h8, const ulong *t) #pragma unroll for(int i = 0; i < 18; ++i) { - p = SkeinEvenRound(p, h, t, i); + p = SkeinEvenRound(p, h, t, 0U, i); ++i; ulong tmp = h.s0; h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); h.s7 = h8; h8 = tmp; - p = SkeinOddRound(p, h, t, i); + p = SkeinOddRound(p, h, t, 1U, i); + ++i; + tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + p = SkeinEvenRound(p, h, t, 2U, i); + ++i; + tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + p = SkeinOddRound(p, h, t, 0U, i); + ++i; + tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + p = SkeinEvenRound(p, h, t, 1U, i); + ++i; + tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + p = SkeinOddRound(p, h, t, 2U, i); tmp = h.s0; h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); h.s7 = h8; h8 = tmp; } - SKEIN_INJECT_KEY(p, 18); + p += h; + p.s5 += t[0]; + p.s6 += t[1]; + p.s7 += 18; return(p); } -- cgit v1.1 From b9fb744a104cc8eceb9196a676bea2c4f0e14d51 Mon Sep 17 00:00:00 2001 From: Grant Galitz Date: Sun, 7 Jan 2018 23:45:41 -0500 Subject: author update --- xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl | 1 + 1 file changed, 1 insertion(+) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl index bebc2ab..e2a867d 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl @@ -3,6 +3,7 @@ R"===( #define WOLF_SKEIN_CL // Vectorized Skein implementation macros and functions by Wolf +// Updated by taisel #define SKEIN_KS_PARITY 0x1BD11BDAA9FC1A22 -- cgit v1.1 From ac474caa8b9392881736ddaae349d4cb649683a9 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 24 Jan 2018 22:08:48 +0100 Subject: remove usage of `rotate` revert the change that the OpenCl function `rotate` is used instead of `SKEIN_ROT` --- xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl index e2a867d..279b652 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl @@ -30,10 +30,19 @@ static const __constant ulong SKEIN512_256_IV[8] = p.s7 += q; \ } while(0) +ulong SKEIN_ROT(const uint2 x, const uint y) +{ + if(y < 32) return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); + else return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); +} + void SkeinMix8(ulong4 *pv0, ulong4 *pv1, const ulong4 rc) { *pv0 += *pv1; - *pv1 = rotate(*pv1, (ulong4)rc); + (*pv1).s0 = SKEIN_ROT(as_uint2((*pv1).s0), rc.s0); + (*pv1).s1 = SKEIN_ROT(as_uint2((*pv1).s1), rc.s1); + (*pv1).s2 = SKEIN_ROT(as_uint2((*pv1).s2), rc.s2); + (*pv1).s3 = SKEIN_ROT(as_uint2((*pv1).s3), rc.s3); *pv1 ^= *pv0; } -- cgit v1.1 From c7d136fe308cfb39c8fe05c6ee615f579238b2ab Mon Sep 17 00:00:00 2001 From: Andre Mueller <23385321+7022Andre@users.noreply.github.com> Date: Wed, 24 Jan 2018 15:55:14 -0800 Subject: Update compile process Added/Rewrote instructions on how to compile in Windows; Fixed some typos and syntax; Added build option section --- doc/compile_Windows.md | 88 ++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index 940e0c3..e32906c 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -4,54 +4,54 @@ ### Preparation -- open a command line `cmd` -- run `mkdir C:\xmr-stak-dep` +- Open a command line (Windows key + r) and enter `cmd` +- Execute `mkdir C:\xmr-stak-dep` -### Visual Studio 2017 Community +### Visual Studio Community 2017 -- download VS2017 Community and install from [https://www.visualstudio.com/downloads/](https://www.visualstudio.com/downloads/) -- during the install chose the components +- Download and install [Visual Studio Community 2017](https://www.visualstudio.com/downloads/) +- During install choose following components: - `Desktop development with C++` (left side) - - `VC++ 2015.3 v140 toolset for desktop` (right side) + - `VC++ 2015.3 v140 toolset for desktop` (right side - **NOT** needed for CUDA 9 or AMD GPU) - Since release of VS2017 15.5 (12/04/17), require `VC++ 2017 version 15.4 v14.11 toolset` (under tab `Individual Components`, section `Compilers, build tools, and runtimes`), as CUDA 9.1 is not compatible with compiler 14.12.X ### CMake for Win64 -- download and install the latest version from [https://cmake.org/download/](https://cmake.org/download/) -- tested version: [cmake 3.9](https://cmake.org/files/v3.9/cmake-3.9.0-rc3-win64-x64.msi) -- during the install choose the option `Add CMake to the system PATH for all users` +- Download and install latest version from https://cmake.org/download/ +- Tested version: [cmake 3.9](https://cmake.org/files/v3.9/cmake-3.9.0-rc3-win64-x64.msi) +- During install choose option: `Add CMake to the system PATH for all users` -### Cuda 8.0+ (only needed to use NVIDIA GPUs) +### Cuda 8.0+ (only needed for NVIDIA GPUs) -- download and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) -- for minimal install choose `Custom installation options` during the install and select +- Download and install https://developer.nvidia.com/cuda-downloads +- For minimal install choose `Custom installation options` during the install and select - CUDA/Develpment - CUDA/Visual Studio Integration (ignore the warning during the install that VS2017 is not supported) - CUDA/Runtime - Driver components -### AMD APP SDK 3.0 (only needed to use AMD GPUs) +### AMD APP SDK 3.0 (only needed for AMD GPUs) -- download and install the latest version from [http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/](http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/) +- Download and install the latest version from http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/ ### Dependencies OpenSSL/Hwloc and Microhttpd -- for CUDA 8*: - - download the version 1 of the precompiled binary from [https://github.com/fireice-uk/xmr-stak-dep/releases/download/v1/xmr-stak-dep.zip](https://github.com/fireice-uk/xmr-stak-dep/releases/download/v1/xmr-stak-dep.zip) - - version 1 of the pre-compiled dependencies is not compatible with Visual Studio Toolset v141 -- for CUDA 9 **and/or** AMD GPUs, CPU: - - download the version 2 of the precompiled binary from [https://github.com/fireice-uk/xmr-stak-dep/releases/download/v2/xmr-stak-dep.zip](https://github.com/fireice-uk/xmr-stak-dep/releases/download/v2/xmr-stak-dep.zip) - - version 2 of the pre-compiled dependencies is not compatible with Visual Studio Toolset v140 -- unzip all to `C:\xmr-stak-dep` +- For CUDA 8*: + - Download version 1 of the precompiled binary from https://github.com/fireice-uk/xmr-stak-dep/releases/download/v1/xmr-stak-dep.zip + - Version 1 of the pre-compiled dependencies is not compatible with Visual Studio Toolset v141 +- For CUDA 9* **and/or** AMD GPUs, CPU: + - Download version 2 of the precompiled binary from https://github.com/fireice-uk/xmr-stak-dep/releases/download/v2/xmr-stak-dep.zip + - Version 2 of the pre-compiled dependencies is not compatible with Visual Studio Toolset v140 +- Extract archive to `C:\xmr-stak-dep` ### Validate the Dependency Folder -- open a command line `cmd` -- run +- Open a command line (Windows key + r) and enter `cmd` +- Execute ``` cd c:\xmr-stak-dep tree . ``` -- the result should have the same structure +- You should see something like this: ``` C:\xmr-stak-dep>tree . Folder PATH listing for volume Windows @@ -76,34 +76,36 @@ ## Compile -- download `xmr-stak` [Source Code.zip](https://github.com/fireice-uk/xmr-stak/releases) and save to a location in your home folder (C:\Users\USERNAME\) -- extract the Source Code.zip file -- open the command line terminal `cmd` -- `cd` to the extracted source code directory -- execute the following commands (NOTE: path to Visual Studio 2017 Community can be different) +- Download xmr-stak [Source Code.zip](https://github.com/fireice-uk/xmr-stak/releases) and save to a location in your home folder (C:\Users\USERNAME\) +- Extract `Source Code.zip` (e.g. to `C:\Users\USERNAME\xmr-stak-`) +- Open a command line (Windows key + r) and enter `cmd` +- Go to extracted source code directory (e.g. `cd C:\Users\USERNAME\xmr-stak-`) +- Execute the following commands (NOTE: path to Visual Studio Community 2017 can be different) ``` - # Next line is only if compiling for Cuda 9.1 and using Visual Studio 2017 >= 15.5 (released 12/04/17) + # Execute next line only if compiling for Cuda 9.1 and using Visual Studio 2017 >= 15.5 (released 12/04/17) "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=14.11 + "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsMSBuildCmd.bat" + set CMAKE_PREFIX_PATH=C:\xmr-stak-dep\hwloc;C:\xmr-stak-dep\libmicrohttpd;C:\xmr-stak-dep\openssl + ``` +- Sometimes Windows will change the directory to `C:\Users\USERNAME\source\` instead of `C:\Users\USERNAME\xmr-stak-\`. If that's the case execute `cd C:\Users\USERNAME\xmr-stak-` followed by: + ``` mkdir build + cd build ``` - - Sometimes Windows will `cd` to `C:\Users\USERNAME\source\build\` instead of `C:\Users\USERNAME\xmr-stak-\build`. Ensure you are in the correct `build` directory before proceeding. - - - for CUDA 8* - ``` - cmake -G "Visual Studio 15 2017 Win64" -T v140,host=x64 .. - ``` - - for CUDA 9 **and/or** AMD GPUs, CPU - ``` - cmake -G "Visual Studio 15 2017 Win64" -T v141,host=x64 .. - ``` +### CMake + +- See [build options](https://github.com/fireice-uk/xmr-stak/blob/master/doc/compile.md#build-system) to enable or disable dependencies. +- For CUDA 8* execute: `cmake -G "Visual Studio 15 2017 Win64" -T v140,host=x64 ..` +- For CUDA 9* **and/or** AMD GPUs, CPU execute: `cmake -G "Visual Studio 15 2017 Win64" -T v141,host=x64 ..` +- Then execute ``` cmake --build . --config Release --target install + cd bin\Release + copy C:\xmr-stak-dep\openssl\bin\* . ``` - -\* Miner is also compiled for AMD GPUs (if the AMD APP SDK is installed) and CPUs. -CUDA 8 requires a downgrade to the old v140 tool chain. +- Miner is by default compiled for NVIDIA GPUs (if CUDA is installed), AMD GPUs (if the AMD APP SDK is installed) and CPUs. -- cgit v1.1 From e191229ac1b190da99209eebb2858ace94d27df9 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 27 Jan 2018 21:05:45 +0100 Subject: void nonce overlappping The cpu miner backend uses the wrong ranges of nonces instead of using `[startNonce,startNonce + nonce_chunk)` (startNonce,startNonce + nonce_chunk]` is used. This will results in an overlap with nonces used by the gpu back-ends. --- xmrstak/backend/cpu/minethd.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 1d9165e..cef4f8e 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -448,12 +448,13 @@ void minethd::work_main() globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk); } - *piNonce = ++result.iNonce; + *piNonce = result.iNonce; hash_fun(oWork.bWorkBlob, oWork.iWorkSize, result.bResult, ctx); if (*piHashVal < oWork.iTarget) executor::inst()->push_event(ex_event(result, oWork.iPoolId)); + result.iNonce++; std::this_thread::yield(); } @@ -637,7 +638,7 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi) } for (size_t i = 0; i < N; i++) - *piNonce[i] = ++iNonce; + *piNonce[i] = iNonce++; hash_fun_multi(bWorkBlob, oWork.iWorkSize, bHashOut, ctx); @@ -645,7 +646,7 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi) { if (*piHashVal[i] < oWork.iTarget) { - executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce - N + 1 + i, bHashOut + 32 * i, iThreadNo), oWork.iPoolId)); + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce - N + i, bHashOut + 32 * i, iThreadNo), oWork.iPoolId)); } } -- cgit v1.1 From 2bc5a055e1416a852b23eb33ac7ad0a0d96d8de5 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 30 Jan 2018 20:32:09 +0100 Subject: improve AMD auto suggestion for AEON increase the intensity limit for AEON --- xmrstak/backend/amd/autoAdjust.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 93b71ba..afedb5c 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -117,6 +117,9 @@ private: */ maxThreads = 2024u; } + // increase all intensity limits by two for aeon + if(!::jconf::inst()->IsCurrencyMonero()) + maxThreads *= 2u; // keep 128MiB memory free (value is randomly chosen) size_t availableMem = ctx.freeMem - (128u * byteToMiB); -- cgit v1.1 From c528f51a767a8e988dc03be080094dc979107499 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 30 Jan 2018 20:47:56 +0100 Subject: speedup Volta - enable L1 cache for Nvidia Volta GPUs and newer - remove explicit cache controll for Volta GPU and newer This pull request increases the hash rate for Volta GPUs by ~5% --- xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 12 ++++++++++++ xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 8 +++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 15a6f36..cc97274 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m template< typename T > __device__ __forceinline__ T loadGlobal64( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ T loadGlobal32( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ void storeGlobal32( T* addr, T const & val ) { +#if (__CUDA_ARCH__ < 700) asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) ); +#else + *addr = val; +#endif } template diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index d5d0039..92259db 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) break; }; - CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); + const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1]; + + /* Disable L1 cache for GPUs before Volta. + * L1 speed is increased and latency reduced with Volta. + */ + if(gpuArch < 70) + CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); size_t hashMemSize; if(::jconf::inst()->IsCurrencyMonero()) -- cgit v1.1 From 1ea14c8d23d8cbbb97ecf99b0a7673a031151ebe Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 30 Jan 2018 21:33:30 +0100 Subject: fix output of gpu name fix that the GPU name is printed before the name is querried from OpenCL --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index d9bc962..c39c567 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -518,13 +518,13 @@ std::vector getAMDDevices(int index) printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k); continue; } - printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory ctx.deviceIdx = k; ctx.freeMem = std::min(ctx.freeMem, maxMem); ctx.name = std::string(devNameVec.data()); ctx.DeviceID = device_list[k]; + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); ctxVec.push_back(ctx); } } -- cgit v1.1 From cb9d7a43ba3ddc75a57259d9626f2f5742efb181 Mon Sep 17 00:00:00 2001 From: Andre <23385321+7022Andre@users.noreply.github.com> Date: Wed, 31 Jan 2018 09:08:57 -0800 Subject: Move section (build folder) up Moved section about Windows changing the working directory higher. --- doc/compile_Windows.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index a512373..129596c 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -86,15 +86,16 @@ "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=14.11 "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsMSBuildCmd.bat" - - set CMAKE_PREFIX_PATH=C:\xmr-stak-dep\hwloc;C:\xmr-stak-dep\libmicrohttpd;C:\xmr-stak-dep\openssl ``` - Sometimes Windows will change the directory to `C:\Users\USERNAME\source\` instead of `C:\Users\USERNAME\xmr-stak-\`. If that's the case execute `cd C:\Users\USERNAME\xmr-stak-` followed by: ``` mkdir build cd build + + set CMAKE_PREFIX_PATH=C:\xmr-stak-dep\hwloc;C:\xmr-stak-dep\libmicrohttpd;C:\xmr-stak-dep\openssl ``` + ### CMake - See [build options](https://github.com/fireice-uk/xmr-stak/blob/master/doc/compile.md#build-system) to enable or disable dependencies. -- cgit v1.1 From fc9d30b56a2f4171e8437444fc4f9d1f21b8a386 Mon Sep 17 00:00:00 2001 From: Steve Swinsburg Date: Thu, 1 Feb 2018 14:16:59 +1100 Subject: Update FAQ.md Add info about sysctl.conf so that the settings are preserved. --- doc/FAQ.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/FAQ.md b/doc/FAQ.md index 23507f2..0a4367c 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -40,13 +40,18 @@ Download and install this [runtime package](https://go.microsoft.com/fwlink/?Lin ## Error: MEMORY ALLOC FAILED: mmap failed -On Linux you will need to configure large page support `sudo sysctl -w vm.nr_hugepages=128` and increase your -ulimit -l. To do this you need to add following lines to /etc/security/limits.conf: +On Linux you will need to configure large page support and increase your ulimit -l. + +To set large page support, add the following lines to /etc/sysctl.conf: + + vm.nr_hugepages=128 + +To increase the ulimit, add following lines to /etc/security/limits.conf: * soft memlock 262144 * hard memlock 262144 -Save file. You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session). +You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session). You can also do it Windows-style and simply run-as-root, but this is NOT recommended for security reasons. -- cgit v1.1 From 064804bd17be1216dba42fa55c820294c5e763a3 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 1 Feb 2018 20:37:50 +1030 Subject: Fix Disabling AMD GPUs The AMD jconf.cpp would only accept an array. The config sample, and the nvidia and cpu config work with the value 'null', as they accept a 'kNullType'. This means at current, AMD GPUs could not be turned off, the config file wouldn't parse on load. This change makes AMD consistent with the others, and can now be disabled. --- xmrstak/backend/amd/jconf.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index 07afb19..f126342 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -56,9 +56,10 @@ struct configVal { Type iType; }; -//Same order as in configEnum, as per comment above +// Same order as in configEnum, as per comment above +// kNullType means any type configVal oConfigValues[] = { - { aGpuThreadsConf, "gpu_threads_conf", kArrayType }, + { aGpuThreadsConf, "gpu_threads_conf", kNullType }, { iPlatformIdx, "platform_index", kNumberType } }; @@ -68,6 +69,8 @@ inline bool checkType(Type have, Type want) { if(want == have) return true; + else if(want == kNullType) + return true; else if(want == kTrueType && have == kFalseType) return true; else if(want == kFalseType && have == kTrueType) -- cgit v1.1 From 03a6f9c8ff4c5fcc04c9cdeedf6511fc292fb86b Mon Sep 17 00:00:00 2001 From: Jurn Ho Date: Sat, 3 Feb 2018 17:14:13 +0000 Subject: Update compile_Linux.md --- doc/compile_Linux.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md index b7104ac..a3031be 100644 --- a/doc/compile_Linux.md +++ b/doc/compile_Linux.md @@ -8,7 +8,7 @@ ### Cuda 8.0+ (only needed to use NVIDIA GPUs) -- donwload and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) +- download and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) - for minimal install choose `Custom installation options` during the install and select - CUDA/Develpment - CUDA/Runtime -- cgit v1.1 From 40e40b557a58094b9c16d0cadacf94c3bf6a6658 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Sat, 10 Feb 2018 21:56:10 +0000 Subject: Add rig identifier --- xmrstak/cli/cli-miner.cpp | 49 +++++++++++++++++++++++++++++++++++++++++------ xmrstak/config.tpl | 1 + xmrstak/jconf.cpp | 8 +++++--- xmrstak/jconf.hpp | 1 + xmrstak/misc/executor.cpp | 15 ++++++++------- xmrstak/net/jpsock.cpp | 8 ++++---- xmrstak/net/jpsock.hpp | 3 ++- xmrstak/params.hpp | 2 ++ 8 files changed, 66 insertions(+), 21 deletions(-) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 7dc0f2f..29c56b2 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -95,6 +95,7 @@ void help() cout<<" -o, --url URL pool url and port, e.g. pool.usxmrpool.com:3333"<print_msg(L0, "Pool address has to be set if you want to specify rigid."); + win_exit(); + return 1; + } + + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for parameter '-r/--rigid' given"); + win_exit(); + return 1; + } + + params::inst().userSetRigid = true; + params::inst().poolRigid = argv[i]; + } else if(opName.compare("--use-nicehash") == 0) { params::inst().nicehashMode = true; diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl index 2c7bd41..451ea7b 100644 --- a/xmrstak/config.tpl +++ b/xmrstak/config.tpl @@ -2,6 +2,7 @@ R"===( /* * pool_address - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported. * wallet_address - Your wallet, or pool login. + * rig_id - Rig identifier for pool-side statistics (needs pool support). * pool_password - Can be empty in most cases or "x". * use_nicehash - Limit the nonce to 3 bytes as required by nicehash. * use_tls - This option will make us connect using Transport Layer Security. diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index a1db451..c9d3a20 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -129,12 +129,13 @@ bool jconf::GetPoolConfig(size_t id, pool_cfg& cfg) return false; typedef const Value* cval; - cval jaddr, jlogin, jpasswd, jnicehash, jtls, jtlsfp, jwt; + cval jaddr, jlogin, jrigid, jpasswd, jnicehash, jtls, jtlsfp, jwt; const Value& oThdConf = prv->configValues[aPoolList]->GetArray()[id]; /* We already checked presence and types */ jaddr = GetObjectMember(oThdConf, "pool_address"); jlogin = GetObjectMember(oThdConf, "wallet_address"); + jrigid = GetObjectMember(oThdConf, "rig_id"); jpasswd = GetObjectMember(oThdConf, "pool_password"); jnicehash = GetObjectMember(oThdConf, "use_nicehash"); jtls = GetObjectMember(oThdConf, "use_tls"); @@ -143,6 +144,7 @@ bool jconf::GetPoolConfig(size_t id, pool_cfg& cfg) cfg.sPoolAddr = jaddr->GetString(); cfg.sWalletAddr = jlogin->GetString(); + cfg.sRigId = jrigid->GetString(); cfg.sPasswd = jpasswd->GetString(); cfg.nicehash = jnicehash->GetBool(); cfg.tls = jtls->GetBool(); @@ -420,8 +422,8 @@ bool jconf::parse_config(const char* sFilename) std::vector pool_weights; pool_weights.reserve(pool_cnt); - const char* aPoolValues[] = { "pool_address", "wallet_address", "pool_password", "use_nicehash", "use_tls", "tls_fingerprint", "pool_weight" }; - Type poolValTypes[] = { kStringType, kStringType, kStringType, kTrueType, kTrueType, kStringType, kNumberType }; + const char* aPoolValues[] = { "pool_address", "wallet_address", "rig_id", "pool_password", "use_nicehash", "use_tls", "tls_fingerprint", "pool_weight" }; + Type poolValTypes[] = { kStringType, kStringType, kStringType, kStringType, kTrueType, kTrueType, kStringType, kNumberType }; constexpr size_t pvcnt = sizeof(aPoolValues)/sizeof(aPoolValues[0]); for(uint32_t i=0; i < pool_cnt; i++) diff --git a/xmrstak/jconf.hpp b/xmrstak/jconf.hpp index df1bf79..9a4e958 100644 --- a/xmrstak/jconf.hpp +++ b/xmrstak/jconf.hpp @@ -23,6 +23,7 @@ public: struct pool_cfg { const char* sPoolAddr; const char* sWalletAddr; + const char* sRigId; const char* sPasswd; bool nicehash; bool tls; diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 055739b..c4ba26e 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -518,13 +518,14 @@ void executor::ex_main() already_have_cli_pool = true; const char* wallet = params.poolUsername.empty() ? cfg.sWalletAddr : params.poolUsername.c_str(); + const char* rigid = params.userSetRigid ? params.poolRigid.c_str() : cfg.sRigId; const char* pwd = params.userSetPwd ? params.poolPasswd.c_str() : cfg.sPasswd; bool nicehash = cfg.nicehash || params.nicehashMode; - pools.emplace_back(i+1, cfg.sPoolAddr, wallet, pwd, 9.9, false, params.poolUseTls, cfg.tls_fingerprint, nicehash); + pools.emplace_back(i+1, cfg.sPoolAddr, wallet, rigid, pwd, 9.9, false, params.poolUseTls, cfg.tls_fingerprint, nicehash); } else - pools.emplace_back(i+1, cfg.sPoolAddr, cfg.sWalletAddr, cfg.sPasswd, cfg.weight, false, cfg.tls, cfg.tls_fingerprint, cfg.nicehash); + pools.emplace_back(i+1, cfg.sPoolAddr, cfg.sWalletAddr, cfg.sRigId, cfg.sPasswd, cfg.weight, false, cfg.tls, cfg.tls_fingerprint, cfg.nicehash); } if(!xmrstak::params::inst().poolURL.empty() && !already_have_cli_pool) @@ -536,22 +537,22 @@ void executor::ex_main() win_exit(); } - pools.emplace_back(i+1, params.poolURL.c_str(), params.poolUsername.c_str(), params.poolPasswd.c_str(), 9.9, false, params.poolUseTls, "", params.nicehashMode); + pools.emplace_back(i+1, params.poolURL.c_str(), params.poolUsername.c_str(), params.poolRigid.c_str(), params.poolPasswd.c_str(), 9.9, false, params.poolUseTls, "", params.nicehashMode); } if(jconf::inst()->IsCurrencyMonero()) { if(dev_tls) - pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", 0.0, true, true, "", false); + pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false); else - pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", 0.0, true, false, "", false); + pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false); } else { if(dev_tls) - pools.emplace_front(0, "donate.xmr-stak.net:7777", "", "", 0.0, true, true, "", true); + pools.emplace_front(0, "donate.xmr-stak.net:7777", "", "", "", 0.0, true, true, "", true); else - pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", 0.0, true, false, "", true); + pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", "", 0.0, true, false, "", true); } ex_event ev; diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp index 7ee09e7..9c413dc 100644 --- a/xmrstak/net/jpsock.cpp +++ b/xmrstak/net/jpsock.cpp @@ -92,8 +92,8 @@ struct jpsock::opq_json_val opq_json_val(const Value* val) : val(val) {} }; -jpsock::jpsock(size_t id, const char* sAddr, const char* sLogin, const char* sPassword, double pool_weight, bool dev_pool, bool tls, const char* tls_fp, bool nicehash) : - net_addr(sAddr), usr_login(sLogin), usr_pass(sPassword), tls_fp(tls_fp), pool_id(id), pool_weight(pool_weight), pool(dev_pool), nicehash(nicehash), +jpsock::jpsock(size_t id, const char* sAddr, const char* sLogin, const char* sRigId, const char* sPassword, double pool_weight, bool dev_pool, bool tls, const char* tls_fp, bool nicehash) : + net_addr(sAddr), usr_login(sLogin), usr_rigid(sRigId), usr_pass(sPassword), tls_fp(tls_fp), pool_id(id), pool_weight(pool_weight), pool(dev_pool), nicehash(nicehash), connect_time(0), connect_attempts(0), disconnect_time(0), quiet_close(false) { sock_init(); @@ -523,8 +523,8 @@ bool jpsock::cmd_login() { char cmd_buffer[1024]; - snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\",\"agent\":\"%s\"},\"id\":1}\n", - usr_login.c_str(), usr_pass.c_str(), get_version_str().c_str()); + snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\",\"rigid\":\"%s\",\"agent\":\"%s\"},\"id\":1}\n", + usr_login.c_str(), usr_pass.c_str(), usr_rigid.c_str(), get_version_str().c_str()); opq_json_val oResult(nullptr); diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp index 9d276b7..d9e5542 100644 --- a/xmrstak/net/jpsock.hpp +++ b/xmrstak/net/jpsock.hpp @@ -27,7 +27,7 @@ class base_socket; class jpsock { public: - jpsock(size_t id, const char* sAddr, const char* sLogin, const char* sPassword, double pool_weight, bool dev_pool, bool tls, const char* tls_fp, bool nicehash); + jpsock(size_t id, const char* sAddr, const char* sLogin, const char* sRigId, const char* sPassword, double pool_weight, bool dev_pool, bool tls, const char* tls_fp, bool nicehash); ~jpsock(); bool connect(std::string& sConnectError); @@ -82,6 +82,7 @@ public: private: std::string net_addr; std::string usr_login; + std::string usr_rigid; std::string usr_pass; std::string tls_fp; diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp index d0d6685..bed3427 100644 --- a/xmrstak/params.hpp +++ b/xmrstak/params.hpp @@ -28,6 +28,8 @@ struct params std::string poolURL; bool userSetPwd = false; std::string poolPasswd; + bool userSetRigid = false; + std::string poolRigid; std::string poolUsername; bool nicehashMode = false; -- cgit v1.1 From 116ecfca0c07bd4d76eb09b20003f82205c2e81c Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 11 Feb 2018 19:35:56 +0100 Subject: disable OSX CI test Currently the CI fails due to the issue ``` Error: homebrew/science was deprecated. This tap is now empty as all its formulae were migrated. The command "if [ $TRAVIS_OS_NAME = osx ]; then brew update; brew tap homebrew/science; fi" failed and exited with 1 during . ``` Until the fix is implemented the OSX tests will be disabled. --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0d2d51a..4d53d48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -70,10 +70,10 @@ matrix: - CMAKE_C_COMPILER=gcc-7 - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" - - os: osx - compiler: gcc - env: - - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" +# - os: osx +# compiler: gcc +# env: +# - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" before_install: - . CI/checkPRBranch -- cgit v1.1 From 89726d83a5487bdceb40d446e883f555f3990456 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Fri, 9 Feb 2018 16:53:03 +0000 Subject: Add video guide --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index f0fa131..bdcc6cf 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,11 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV ## HTML reports +## Video setup guide on Windows + +[](https://www.youtube.com/watch?v=m9XFoQvLH8Y) +###### Video by Crypto Sewer + ## Overview * [Features](#features) * [Supported altcoins](#supported-altcoins) -- cgit v1.1 From b0d03b3302549b27866bb978d495c4051bc50371 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 14 Feb 2018 21:21:51 +0100 Subject: AMD: reduce register usage reduce usage of registers: based on the suggestion of @enerc77 --- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index ec05712..c0b6529 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -399,7 +399,7 @@ static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x void AESExpandKey256(uint *keybuf) { //#pragma unroll 4 - for(uint c = 8, i = 1; c < 60; ++c) + for(uint c = 8, i = 1; c < 40; ++c) { // For 256-bit keys, an sbox permutation is done every other 4th uint generated, AND every 8th uint t = ((!(c & 7)) || ((c & 7) == 4)) ? SubWord(keybuf[c - 1]) : keybuf[c - 1]; @@ -421,7 +421,7 @@ __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads) { ulong State[25]; - uint ExpandedKey1[256]; + uint ExpandedKey1[40]; __local uint AES0[256], AES1[256], AES2[256], AES3[256]; uint4 text; @@ -578,7 +578,7 @@ __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads) { __local uint AES0[256], AES1[256], AES2[256], AES3[256]; - uint ExpandedKey2[256]; + uint ExpandedKey2[40]; ulong State[25]; uint4 text; @@ -632,7 +632,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u { text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; - #pragma unroll + #pragma unroll 10 for(int j = 0; j < 10; ++j) text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); } -- cgit v1.1 From 30024ee8a0945c833adf2a2c57e0a78aa3d8d1d0 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 17 Feb 2018 20:35:14 +0100 Subject: fix missing message in log file initialize the log file as fast as possible after the start of the miner --- xmrstak/cli/cli-miner.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 29c56b2..9053844 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -627,6 +627,9 @@ int main(int argc, char *argv[]) } #endif + if(strlen(jconf::inst()->GetOutputFile()) != 0) + printer::inst()->open_logfile(jconf::inst()->GetOutputFile()); + if (!BackendConnector::self_test()) { win_exit(); @@ -672,9 +675,6 @@ int main(int argc, char *argv[]) else printer::inst()->print_msg(L0,"Start mining: AEON"); - if(strlen(jconf::inst()->GetOutputFile()) != 0) - printer::inst()->open_logfile(jconf::inst()->GetOutputFile()); - executor::inst()->ex_start(jconf::inst()->DaemonMode()); uint64_t lastTime = get_timestamp_ms(); -- cgit v1.1 From 737185ee82bae05953680b1f4c4cdf8646c51b5a Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 17 Feb 2018 20:51:55 +0100 Subject: AMD: `mem_chunk`and new `strided_index` - add new option for `strided_index` - add additional option if `strided_index == 2` to controll the memory chunk with --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 11 +++++-- xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 + xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 35 +++++++++++++++++------ xmrstak/backend/amd/autoAdjust.hpp | 2 +- xmrstak/backend/amd/config.tpl | 11 +++++-- xmrstak/backend/amd/jconf.cpp | 32 +++++++++++++++++---- xmrstak/backend/amd/jconf.hpp | 3 +- xmrstak/backend/amd/minethd.cpp | 1 + 8 files changed, 75 insertions(+), 21 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index c39c567..054ffc4 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -332,8 +332,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[256]; snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex ? 1 : 0); + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d", + hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk)); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) { @@ -696,6 +696,13 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) for(int i = 0; i < num_gpus; ++i) { + if(ctx[i].stridedIndex == 2 && (ctx[i].rawIntensity % ctx[i].workSize) != 0) + { + size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize; + ctx[i].rawIntensity = reduced_intensity; + printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity)); + } + if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) { return ret; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index c17bac1..abfad5c 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -25,6 +25,7 @@ struct GpuContext size_t rawIntensity; size_t workSize; int stridedIndex; + int memChunk; /*Output vars*/ cl_device_id DeviceID; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index ec05712..2514092 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -411,12 +411,23 @@ void AESExpandKey256(uint *keybuf) } } +#define MEM_CHUNK (1<<4) + #if(STRIDED_INDEX==0) # define IDX(x) (x) -#else +#elif(STRIDED_INDEX==1) # define IDX(x) ((x) * (Threads)) +#elif(STRIDED_INDEX==2) +# define IDX(x) (((x) % MEM_CHUNK) + ((x) / MEM_CHUNK) * WORKSIZE * MEM_CHUNK) #endif +inline ulong getIdx() +{ +#if(STRIDED_INDEX==0 || STRIDED_INDEX==1 || STRIDED_INDEX==2) + return get_global_id(0) - get_global_offset(0); +#endif +} + __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads) { @@ -425,7 +436,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul __local uint AES0[256], AES1[256], AES2[256], AES3[256]; uint4 text; - const ulong gIdx = get_global_id(0) - get_global_offset(0); + const ulong gIdx = getIdx(); for(int i = get_local_id(1) * WORKSIZE + get_local_id(0); i < 256; @@ -439,7 +450,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } barrier(CLK_LOCAL_MEM_FENCE); - + // do not use early return here if(gIdx < Threads) { @@ -447,8 +458,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul #if(STRIDED_INDEX==0) Scratchpad += gIdx * (ITERATIONS >> 2); -#else +#elif(STRIDED_INDEX==1) Scratchpad += gIdx; +#elif(STRIDED_INDEX==2) + Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif ((ulong8 *)State)[0] = vload8(0, input); @@ -509,7 +522,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre ulong a[2], b[2]; __local uint AES0[256], AES1[256], AES2[256], AES3[256]; - const ulong gIdx = get_global_id(0) - get_global_offset(0); + const ulong gIdx = getIdx(); for(int i = get_local_id(0); i < 256; i += WORKSIZE) { @@ -523,15 +536,17 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre barrier(CLK_LOCAL_MEM_FENCE); uint4 b_x; - + // do not use early return here if(gIdx < Threads) { states += 25 * gIdx; #if(STRIDED_INDEX==0) Scratchpad += gIdx * (ITERATIONS >> 2); -#else +#elif(STRIDED_INDEX==1) Scratchpad += gIdx; +#elif(STRIDED_INDEX==2) + Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif a[0] = states[0] ^ states[4]; @@ -582,7 +597,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u ulong State[25]; uint4 text; - const ulong gIdx = get_global_id(0) - get_global_offset(0); + const ulong gIdx = getIdx(); for(int i = get_local_id(1) * WORKSIZE + get_local_id(0); i < 256; @@ -603,8 +618,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u states += 25 * gIdx; #if(STRIDED_INDEX==0) Scratchpad += gIdx * (ITERATIONS >> 2); -#else +#elif(STRIDED_INDEX==1) Scratchpad += gIdx; +#elif(STRIDED_INDEX==2) + Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif #if defined(__Tahiti__) || defined(__Pitcairn__) diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index afedb5c..b88d3ee 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -143,7 +143,7 @@ private: // set 8 threads per block (this is a good value for the most gpus) conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : true\n" + " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 4\n" " },\n"; } else diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 25b75a1..8914130 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -6,11 +6,16 @@ R"===( * worksize - Number of local GPU threads (nothing to do with CPU threads) * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. * strided_index - switch memory pattern used for the scratch pad memory - * true = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks - * false = use a contiguous block of memory per thread + * 2 = chunked memory, chunk size is controlled by 'mem_chunk' + * required: intensity must be a multiple of worksize + * 1 or true = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks + * 0 or false = use a contiguous block of memory per thread + * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk + * this value is only used if 'strided_index' == 2 + * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) * "gpu_threads_conf" : * [ - * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true }, + * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 4 }, * ], * If you do not wish to mine with your AMD GPU(s) then use: * "gpu_threads_conf" : diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index f126342..22381e1 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -106,14 +106,15 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *idx, *intensity, *w_size, *aff, *stridedIndex; + const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk; idx = GetObjectMember(oThdConf, "index"); intensity = GetObjectMember(oThdConf, "intensity"); w_size = GetObjectMember(oThdConf, "worksize"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); stridedIndex = GetObjectMember(oThdConf, "strided_index"); + memChunk = GetObjectMember(oThdConf, "mem_chunk"); - if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || stridedIndex == nullptr) + if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || stridedIndex == nullptr || memChunk == nullptr) return false; if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) @@ -122,13 +123,34 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) if(!aff->IsUint64() && !aff->IsBool()) return false; - if(!stridedIndex->IsBool()) + if(!stridedIndex->IsBool() && !stridedIndex->IsNumber()) + { + printer::inst()->print_msg(L0, "ERROR: strided_index must be a bool or a number"); + return false; + } + + if(stridedIndex->IsBool()) + cfg.stridedIndex = stridedIndex->GetBool() ? 1 : 0; + else + cfg.stridedIndex = (int)stridedIndex->GetInt64(); + + if(cfg.stridedIndex > 2) + { + printer::inst()->print_msg(L0, "ERROR: strided_index must be smaller than 2"); return false; + } + + cfg.memChunk = (int)memChunk->GetInt64(); + + if(!idx->IsUint64() || cfg.memChunk > 18 ) + { + printer::inst()->print_msg(L0, "ERROR: mem_chunk must be smaller than 18"); + return false; + } cfg.index = idx->GetUint64(); - cfg.intensity = intensity->GetUint64(); cfg.w_size = w_size->GetUint64(); - cfg.stridedIndex = stridedIndex->GetBool(); + cfg.intensity = intensity->GetUint64(); if(aff->IsNumber()) cfg.cpu_aff = aff->GetInt64(); diff --git a/xmrstak/backend/amd/jconf.hpp b/xmrstak/backend/amd/jconf.hpp index ee1882a..91e5d0d 100644 --- a/xmrstak/backend/amd/jconf.hpp +++ b/xmrstak/backend/amd/jconf.hpp @@ -26,7 +26,8 @@ public: size_t intensity; size_t w_size; long long cpu_aff; - bool stridedIndex; + int stridedIndex; + int memChunk; }; size_t GetThreadCount(); diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 422c28c..ca5e163 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -97,6 +97,7 @@ bool minethd::init_gpus() vGpuData[i].rawIntensity = cfg.intensity; vGpuData[i].workSize = cfg.w_size; vGpuData[i].stridedIndex = cfg.stridedIndex; + vGpuData[i].memChunk = cfg.memChunk; } return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; -- cgit v1.1 From 7b3929dbeff5254a77dcd2c6be89324a11adf4c8 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 19 Feb 2018 21:52:18 +0100 Subject: add Mesa OpenCL support allow usage of Mesa OpenCl --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 24 ++++++++++++++++++++--- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 5 +++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 054ffc4..af20dce 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -549,6 +549,8 @@ int getAMDPlatformIdx() clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); int platformIndex = -1; + // Mesa OpenCL is the fallback if no AMD or Apple OpenCL is found + int mesaPlatform = -1; if(clStatus == CL_SUCCESS) { @@ -559,13 +561,29 @@ int getAMDPlatformIdx() clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos || platformName.find("Apple") != std::string::npos) + if( platformName.find("Advanced Micro Devices") != std::string::npos || + platformName.find("Apple") != std::string::npos || + platformName.find("Mesa") != std::string::npos + ) { - platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); - break; + if(platformName.find("Mesa") != std::string::npos) + mesaPlatform = i; + else + { + // exit if AMD or Apple platform is found + platformIndex = i; + break; + } } } + // fall back to Mesa OpenCL + if(platformIndex == -1 && mesaPlatform != -1) + { + printer::inst()->print_msg(L0,"No AMD platform found select Mesa as OpenCL platform"); + platformIndex = mesaPlatform; + } } else printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 53299ec..9ff5bf7 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -14,6 +14,11 @@ R"===( * along with this program. If not, see . */ +/* For Mesa clover support */ +#ifdef cl_clang_storage_class_specifiers +# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable +#endif + #ifdef cl_amd_media_ops #pragma OPENCL EXTENSION cl_amd_media_ops : enable #else -- cgit v1.1 From cff6b6cbfbb3da44d85753885466de5122e20472 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 12 Feb 2018 20:39:49 +0100 Subject: add OpenCL compatibility mode - add new option `comp_mode` to the amd config - disable `if guards` within opencl kernel if `comp_mode : false` --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 17 +++++++++++------ xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 + xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 19 ++++++++++++++++--- xmrstak/backend/amd/autoAdjust.hpp | 3 ++- xmrstak/backend/amd/config.tpl | 12 ++++++++---- xmrstak/backend/amd/jconf.cpp | 10 ++++++++-- xmrstak/backend/amd/jconf.hpp | 1 + xmrstak/backend/amd/minethd.cpp | 1 + 8 files changed, 48 insertions(+), 16 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 054ffc4..2f16b67 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -332,8 +332,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[256]; snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk)); + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d -DCOMP_MODE=%d", + hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) { @@ -873,10 +873,15 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) size_t g_intensity = ctx->rawIntensity; size_t w_size = ctx->workSize; - // round up to next multiple of w_size - size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; - // number of global threads must be a multiple of the work group size (w_size) - assert(g_thd%w_size == 0); + size_t g_thd = g_intensity; + + if(ctx->compMode) + { + // round up to next multiple of w_size + size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + // number of global threads must be a multiple of the work group size (w_size) + assert(g_thd%w_size == 0); + } for(int i = 2; i < 6; ++i) { diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index abfad5c..8fb7168 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -26,6 +26,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + int compMode; /*Output vars*/ cl_device_id DeviceID; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 53299ec..4bac68c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -451,8 +451,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; @@ -483,9 +485,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_GLOBAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll for(int i = 0; i < 25; ++i) states[i] = State[i]; @@ -499,9 +502,10 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_LOCAL_MEM_FENCE); - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -536,9 +540,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre barrier(CLK_LOCAL_MEM_FENCE); uint4 b_x; - +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -559,8 +564,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre mem_fence(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 8 for(int i = 0; i < ITERATIONS; ++i) @@ -612,8 +619,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { states += 25 * gIdx; #if(STRIDED_INDEX==0) @@ -641,8 +650,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { #pragma unroll 2 for(int i = 0; i < (ITERATIONS >> 5); ++i) @@ -659,8 +670,10 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_GLOBAL_MEM_FENCE); +#if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) +#endif { if(!get_local_id(1)) { diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index b88d3ee..8d60b94 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -143,7 +143,8 @@ private: // set 8 threads per block (this is a good value for the most gpus) conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + - " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 4\n" + " \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 4,\n" + " \"comp_mode\" : true\n" + " },\n"; } else diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 8914130..84251c7 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -1,9 +1,9 @@ R"===( /* * GPU configuration. You should play around with intensity and worksize as the fastest settings will vary. - * index - GPU index number usually starts from 0 - * intensity - Number of parallel GPU threads (nothing to do with CPU threads) - * worksize - Number of local GPU threads (nothing to do with CPU threads) + * index - GPU index number usually starts from 0 + * intensity - Number of parallel GPU threads (nothing to do with CPU threads) + * worksize - Number of local GPU threads (nothing to do with CPU threads) * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. * strided_index - switch memory pattern used for the scratch pad memory * 2 = chunked memory, chunk size is controlled by 'mem_chunk' @@ -13,9 +13,13 @@ R"===( * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk * this value is only used if 'strided_index' == 2 * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) + * comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows + * to use a intensity which is not the multiple of the worksize. + * If you set false and the intensity is not multiple of the worksize the miner can crash: + * in this case set the intensity to a multiple of the worksize or activate comp_mode. * "gpu_threads_conf" : * [ - * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 4 }, + * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 4, "comp_mode" : true }, * ], * If you do not wish to mine with your AMD GPU(s) then use: * "gpu_threads_conf" : diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index 22381e1..93ba709 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -106,15 +106,17 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk; + const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk, *compMode; idx = GetObjectMember(oThdConf, "index"); intensity = GetObjectMember(oThdConf, "intensity"); w_size = GetObjectMember(oThdConf, "worksize"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); stridedIndex = GetObjectMember(oThdConf, "strided_index"); memChunk = GetObjectMember(oThdConf, "mem_chunk"); + compMode = GetObjectMember(oThdConf, "comp_mode"); - if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || stridedIndex == nullptr || memChunk == nullptr) + if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || memChunk == nullptr || + stridedIndex == nullptr || compMode == nullptr) return false; if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) @@ -148,9 +150,13 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) return false; } + if(!compMode->IsBool()) + return false; + cfg.index = idx->GetUint64(); cfg.w_size = w_size->GetUint64(); cfg.intensity = intensity->GetUint64(); + cfg.compMode = compMode->GetBool(); if(aff->IsNumber()) cfg.cpu_aff = aff->GetInt64(); diff --git a/xmrstak/backend/amd/jconf.hpp b/xmrstak/backend/amd/jconf.hpp index 91e5d0d..580b69f 100644 --- a/xmrstak/backend/amd/jconf.hpp +++ b/xmrstak/backend/amd/jconf.hpp @@ -28,6 +28,7 @@ public: long long cpu_aff; int stridedIndex; int memChunk; + bool compMode; }; size_t GetThreadCount(); diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index ca5e163..8dfbce5 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -98,6 +98,7 @@ bool minethd::init_gpus() vGpuData[i].workSize = cfg.w_size; vGpuData[i].stridedIndex = cfg.stridedIndex; vGpuData[i].memChunk = cfg.memChunk; + vGpuData[i].compMode = cfg.compMode; } return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; -- cgit v1.1 From c975def43e6e8f5a776b872ec089326fd319c0d7 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 19 Feb 2018 22:41:08 +0100 Subject: fix compile with OpenCL 1.1 guard error types those are only defined in OpenCL >1.1 --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 054ffc4..f8f8a6e 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -84,6 +84,7 @@ const char* err_to_str(cl_int ret) return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; +#ifdef CL_VERSION_1_2 case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE"; case CL_LINKER_NOT_AVAILABLE: @@ -94,6 +95,7 @@ const char* err_to_str(cl_int ret) return "CL_DEVICE_PARTITION_FAILED"; case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; +#endif case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; case CL_INVALID_DEVICE_TYPE: @@ -164,6 +166,7 @@ const char* err_to_str(cl_int ret) return "CL_INVALID_GLOBAL_WORK_SIZE"; case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY"; +#ifdef CL_VERSION_1_2 case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR"; case CL_INVALID_COMPILER_OPTIONS: @@ -172,6 +175,7 @@ const char* err_to_str(cl_int ret) return "CL_INVALID_LINKER_OPTIONS"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#endif #if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2) case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE"; -- cgit v1.1 From dc4e3793454a8ac7ae85704e7997d878c378b0aa Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 21 Feb 2018 22:23:00 +0100 Subject: fix broken memchunk feature fix double definition of define `MEM_CHUNK` --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 +- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 2 +- xmrstak/backend/amd/autoAdjust.hpp | 2 +- xmrstak/backend/amd/config.tpl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 8c4a40d..95d30f7 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -336,7 +336,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ char options[256]; snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK=%d -DCOMP_MODE=%d", + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d", hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0); ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); if(ret != CL_SUCCESS) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index dbe8991..9383b04 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -416,7 +416,7 @@ void AESExpandKey256(uint *keybuf) } } -#define MEM_CHUNK (1<<4) +#define MEM_CHUNK (1< Date: Wed, 28 Feb 2018 23:33:23 +0100 Subject: CUDA: reduce startup time - reduce startup time for multi gpu systems - initialize the GPU memory non concurrent --- xmrstak/backend/nvidia/minethd.cpp | 41 ++++++++++++++++++++++++++------------ xmrstak/backend/nvidia/minethd.hpp | 10 +++++++--- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 9fd08fb..867a998 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -80,14 +80,22 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) ctx.syncMode = cfg.syncMode; this->affinity = cfg.cpu_aff; - std::unique_lock lck(thd_aff_set); - std::future order_guard = order_fix.get_future(); + std::future numa_guard = numa_promise.get_future(); + thread_work_guard = thread_work_promise.get_future(); oWorkThd = std::thread(&minethd::work_main, this); - order_guard.wait(); + /* Wait until the gpu memory is initialized and numa cpu memory is pinned. + * The startup time is reduced if the memory is initialized in sequential order + * without concurrent threads (CUDA driver is less occupied). + */ + numa_guard.wait(); +} - if(affinity >= 0) //-1 means no affinity +void minethd::start_mining() +{ + thread_work_promise.set_value(); + if(this->affinity >= 0) //-1 means no affinity if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity)) printer::inst()->print_msg(L1, "WARNING setting affinity failed."); } @@ -179,6 +187,11 @@ std::vector* minethd::thread_starter(uint32_t threadOffset, miner_wor } + for (i = 0; i < n; i++) + { + static_cast((*pvThreads)[i])->start_mining(); + } + return pvThreads; } @@ -208,10 +221,18 @@ void minethd::work_main() if(affinity >= 0) //-1 means no affinity bindMemoryToNUMANode(affinity); - order_fix.set_value(); - std::unique_lock lck(thd_aff_set); - lck.release(); + if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1) + { + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); + std::exit(0); + } + + // numa memory bind and gpu memory is initialized + numa_promise.set_value(); + std::this_thread::yield(); + // wait until all NVIDIA devices are initialized + thread_work_guard.wait(); uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; @@ -221,12 +242,6 @@ void minethd::work_main() globalStates::inst().iConsumeCnt++; - if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1) - { - printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); - std::exit(0); - } - bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero"); while (bQuit == 0) diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp index d13c868..fcd24fa 100644 --- a/xmrstak/backend/nvidia/minethd.hpp +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -32,7 +32,8 @@ private: typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg); - + void start_mining(); + void work_main(); void consume_work(); @@ -44,8 +45,11 @@ private: static miner_work oGlobalWork; miner_work oWork; - std::promise order_fix; - std::mutex thd_aff_set; + std::promise numa_promise; + std::promise thread_work_promise; + + // block thread until all NVIDIA GPUs are initialized + std::future thread_work_guard; std::thread oWorkThd; int64_t affinity; -- cgit v1.1 From e13395e1417ced3804e20f44b64789556993020a Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Sat, 3 Mar 2018 12:12:51 -0800 Subject: Comment cleanup --- xmrstak/backend/cpu/config.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl index fc4acb9..cb4b950 100644 --- a/xmrstak/backend/cpu/config.tpl +++ b/xmrstak/backend/cpu/config.tpl @@ -2,7 +2,7 @@ R"===( /* * Thread configuration for each thread. Make sure it matches the number above. * low_power_mode - This can either be a boolean (true or false), or a number between 1 to 5. When set to true, - this mode will double the cache usage, and double the single thread performance. It will + * this mode will double the cache usage, and double the single thread performance. It will * consume much less power (as less cores are working), but will max out at around 80-85% of * the maximum performance. When set to a number N greater than 1, this mode will increase the * cache usage and single thread performance by N times. -- cgit v1.1 From db866b30decad0cc052568feb94d8cfd3ec5d8c9 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 3 Mar 2018 22:30:05 +0100 Subject: add header only ssh2 generator - add project picosha2 - add license notes for picosha2 --- THIRD-PARTY-LICENSES | 5 + xmrstak/picosha2/picosha2.hpp | 375 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 xmrstak/picosha2/picosha2.hpp diff --git a/THIRD-PARTY-LICENSES b/THIRD-PARTY-LICENSES index 3e62013..d3202f4 100644 --- a/THIRD-PARTY-LICENSES +++ b/THIRD-PARTY-LICENSES @@ -22,3 +22,8 @@ License: MIT License and BSD License ------------------------------------------------------------------------- +Package: PicoSHA2 +Authors: okdshin +License: MIT License + +------------------------------------------------------------------------- diff --git a/xmrstak/picosha2/picosha2.hpp b/xmrstak/picosha2/picosha2.hpp new file mode 100644 index 0000000..b9daec6 --- /dev/null +++ b/xmrstak/picosha2/picosha2.hpp @@ -0,0 +1,375 @@ +/* +The MIT License (MIT) + +Copyright (C) 2017 okdshin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef PICOSHA2_H +#define PICOSHA2_H +// picosha2:20140213 + +#ifndef PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR +#define PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR \ + 1048576 //=1024*1024: default is 1MB memory +#endif + +#include +#include +#include +#include +#include + +namespace picosha2 { +typedef unsigned long word_t; +typedef unsigned char byte_t; + +static const size_t k_digest_size = 32; + +namespace detail { +inline byte_t mask_8bit(byte_t x) { return x & 0xff; } + +inline word_t mask_32bit(word_t x) { return x & 0xffffffff; } + +const word_t add_constant[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + +const word_t initial_message_digest[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, + 0xa54ff53a, 0x510e527f, 0x9b05688c, + 0x1f83d9ab, 0x5be0cd19}; + +inline word_t ch(word_t x, word_t y, word_t z) { return (x & y) ^ ((~x) & z); } + +inline word_t maj(word_t x, word_t y, word_t z) { + return (x & y) ^ (x & z) ^ (y & z); +} + +inline word_t rotr(word_t x, std::size_t n) { + assert(n < 32); + return mask_32bit((x >> n) | (x << (32 - n))); +} + +inline word_t bsig0(word_t x) { return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22); } + +inline word_t bsig1(word_t x) { return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25); } + +inline word_t shr(word_t x, std::size_t n) { + assert(n < 32); + return x >> n; +} + +inline word_t ssig0(word_t x) { return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3); } + +inline word_t ssig1(word_t x) { return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10); } + +template +void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) { + assert(first + 64 == last); + static_cast(last); // for avoiding unused-variable warning + word_t w[64]; + std::fill(w, w + 64, 0); + for (std::size_t i = 0; i < 16; ++i) { + w[i] = (static_cast(mask_8bit(*(first + i * 4))) << 24) | + (static_cast(mask_8bit(*(first + i * 4 + 1))) << 16) | + (static_cast(mask_8bit(*(first + i * 4 + 2))) << 8) | + (static_cast(mask_8bit(*(first + i * 4 + 3)))); + } + for (std::size_t i = 16; i < 64; ++i) { + w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + + w[i - 16]); + } + + word_t a = *message_digest; + word_t b = *(message_digest + 1); + word_t c = *(message_digest + 2); + word_t d = *(message_digest + 3); + word_t e = *(message_digest + 4); + word_t f = *(message_digest + 5); + word_t g = *(message_digest + 6); + word_t h = *(message_digest + 7); + + for (std::size_t i = 0; i < 64; ++i) { + word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i]; + word_t temp2 = bsig0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = mask_32bit(d + temp1); + d = c; + c = b; + b = a; + a = mask_32bit(temp1 + temp2); + } + *message_digest += a; + *(message_digest + 1) += b; + *(message_digest + 2) += c; + *(message_digest + 3) += d; + *(message_digest + 4) += e; + *(message_digest + 5) += f; + *(message_digest + 6) += g; + *(message_digest + 7) += h; + for (std::size_t i = 0; i < 8; ++i) { + *(message_digest + i) = mask_32bit(*(message_digest + i)); + } +} + +} // namespace detail + +template +void output_hex(InIter first, InIter last, std::ostream& os) { + os.setf(std::ios::hex, std::ios::basefield); + while (first != last) { + os.width(2); + os.fill('0'); + os << static_cast(*first); + ++first; + } + os.setf(std::ios::dec, std::ios::basefield); +} + +template +void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) { + std::ostringstream oss; + output_hex(first, last, oss); + hex_str.assign(oss.str()); +} + +template +void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) { + bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str); +} + +template +std::string bytes_to_hex_string(InIter first, InIter last) { + std::string hex_str; + bytes_to_hex_string(first, last, hex_str); + return hex_str; +} + +template +std::string bytes_to_hex_string(const InContainer& bytes) { + std::string hex_str; + bytes_to_hex_string(bytes, hex_str); + return hex_str; +} + +class hash256_one_by_one { + public: + hash256_one_by_one() { init(); } + + void init() { + buffer_.clear(); + std::fill(data_length_digits_, data_length_digits_ + 4, 0); + std::copy(detail::initial_message_digest, + detail::initial_message_digest + 8, h_); + } + + template + void process(RaIter first, RaIter last) { + add_to_data_length(std::distance(first, last)); + std::copy(first, last, std::back_inserter(buffer_)); + std::size_t i = 0; + for (; i + 64 <= buffer_.size(); i += 64) { + detail::hash256_block(h_, buffer_.begin() + i, + buffer_.begin() + i + 64); + } + buffer_.erase(buffer_.begin(), buffer_.begin() + i); + } + + void finish() { + byte_t temp[64]; + std::fill(temp, temp + 64, 0); + std::size_t remains = buffer_.size(); + std::copy(buffer_.begin(), buffer_.end(), temp); + temp[remains] = 0x80; + + if (remains > 55) { + std::fill(temp + remains + 1, temp + 64, 0); + detail::hash256_block(h_, temp, temp + 64); + std::fill(temp, temp + 64 - 4, 0); + } else { + std::fill(temp + remains + 1, temp + 64 - 4, 0); + } + + write_data_bit_length(&(temp[56])); + detail::hash256_block(h_, temp, temp + 64); + } + + template + void get_hash_bytes(OutIter first, OutIter last) const { + for (const word_t* iter = h_; iter != h_ + 8; ++iter) { + for (std::size_t i = 0; i < 4 && first != last; ++i) { + *(first++) = detail::mask_8bit( + static_cast((*iter >> (24 - 8 * i)))); + } + } + } + + private: + void add_to_data_length(word_t n) { + word_t carry = 0; + data_length_digits_[0] += n; + for (std::size_t i = 0; i < 4; ++i) { + data_length_digits_[i] += carry; + if (data_length_digits_[i] >= 65536u) { + carry = data_length_digits_[i] >> 16; + data_length_digits_[i] &= 65535u; + } else { + break; + } + } + } + void write_data_bit_length(byte_t* begin) { + word_t data_bit_length_digits[4]; + std::copy(data_length_digits_, data_length_digits_ + 4, + data_bit_length_digits); + + // convert byte length to bit length (multiply 8 or shift 3 times left) + word_t carry = 0; + for (std::size_t i = 0; i < 4; ++i) { + word_t before_val = data_bit_length_digits[i]; + data_bit_length_digits[i] <<= 3; + data_bit_length_digits[i] |= carry; + data_bit_length_digits[i] &= 65535u; + carry = (before_val >> (16 - 3)) & 65535u; + } + + // write data_bit_length + for (int i = 3; i >= 0; --i) { + (*begin++) = static_cast(data_bit_length_digits[i] >> 8); + (*begin++) = static_cast(data_bit_length_digits[i]); + } + } + std::vector buffer_; + word_t data_length_digits_[4]; // as 64bit integer (16bit x 4 integer) + word_t h_[8]; +}; + +inline void get_hash_hex_string(const hash256_one_by_one& hasher, + std::string& hex_str) { + byte_t hash[k_digest_size]; + hasher.get_hash_bytes(hash, hash + k_digest_size); + return bytes_to_hex_string(hash, hash + k_digest_size, hex_str); +} + +inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) { + std::string hex_str; + get_hash_hex_string(hasher, hex_str); + return hex_str; +} + +namespace impl { +template +void hash256_impl(RaIter first, RaIter last, OutIter first2, OutIter last2, int, + std::random_access_iterator_tag) { + hash256_one_by_one hasher; + // hasher.init(); + hasher.process(first, last); + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} + +template +void hash256_impl(InputIter first, InputIter last, OutIter first2, + OutIter last2, int buffer_size, std::input_iterator_tag) { + std::vector buffer(buffer_size); + hash256_one_by_one hasher; + // hasher.init(); + while (first != last) { + int size = buffer_size; + for (int i = 0; i != buffer_size; ++i, ++first) { + if (first == last) { + size = i; + break; + } + buffer[i] = *first; + } + hasher.process(buffer.begin(), buffer.begin() + size); + } + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} +} + +template +void hash256(InIter first, InIter last, OutIter first2, OutIter last2, + int buffer_size = PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR) { + picosha2::impl::hash256_impl( + first, last, first2, last2, buffer_size, + typename std::iterator_traits::iterator_category()); +} + +template +void hash256(InIter first, InIter last, OutContainer& dst) { + hash256(first, last, dst.begin(), dst.end()); +} + +template +void hash256(const InContainer& src, OutIter first, OutIter last) { + hash256(src.begin(), src.end(), first, last); +} + +template +void hash256(const InContainer& src, OutContainer& dst) { + hash256(src.begin(), src.end(), dst.begin(), dst.end()); +} + +template +void hash256_hex_string(InIter first, InIter last, std::string& hex_str) { + byte_t hashed[k_digest_size]; + hash256(first, last, hashed, hashed + k_digest_size); + std::ostringstream oss; + output_hex(hashed, hashed + k_digest_size, oss); + hex_str.assign(oss.str()); +} + +template +std::string hash256_hex_string(InIter first, InIter last) { + std::string hex_str; + hash256_hex_string(first, last, hex_str); + return hex_str; +} + +inline void hash256_hex_string(const std::string& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +void hash256_hex_string(const InContainer& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +std::string hash256_hex_string(const InContainer& src) { + return hash256_hex_string(src.begin(), src.end()); +} + +} // namespace picosha2 + +#endif // PICOSHA2_H \ No newline at end of file -- cgit v1.1 From 6beb3f59fe03f635082066af01282007303695b8 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sat, 3 Mar 2018 22:39:14 +0100 Subject: add OpenCL compiler cache Reduce OpenCL start time by using a self made compiler cache. - store compiled OpenCL binary - load OpenCl binary if available --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 211 +++++++++++++++++++++++++++++++----- 1 file changed, 182 insertions(+), 29 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 95d30f7..79afa00 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -15,6 +15,7 @@ #include "xmrstak/backend/cryptonight.hpp" #include "xmrstak/jconf.hpp" +#include "xmrstak/picosha2/picosha2.hpp" #include #include @@ -25,8 +26,41 @@ #include #include +#include +#include +#include +#include +#include + +#if defined _MSC_VER +#include +#elif defined __GNUC__ +#include +#include +#endif + + + #ifdef _WIN32 #include +#include + +static inline void create_directory(std::string dirname) +{ + _mkdir(dirname.data()); +} + +static inline std::string get_home() +{ + char path[MAX_PATH + 1]; + // get folder "appdata\local" + if (SHGetSpecialFolderPathA(HWND_DESKTOP, path, CSIDL_LOCAL_APPDATA, FALSE)) + { + return path; + } + else + return "."; +} static inline void port_sleep(size_t sec) { @@ -34,6 +68,22 @@ static inline void port_sleep(size_t sec) } #else #include +#include + +static inline void create_directory(std::string dirname) +{ + mkdir(dirname.data(), 0744); +} + +static inline std::string get_home() +{ + const char *home = "."; + + if ((home = getenv("HOME")) == nullptr) + home = getpwuid(getuid())->pw_dir; + + return home; +} static inline void port_sleep(size_t sec) { @@ -327,57 +377,157 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); - if(ret != CL_SUCCESS) + std::vector devNameVec(1024); + if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the contents of cryptonight.cl", err_to_str(ret)); + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(ret),ctx->deviceIdx ); return ERR_OCL_API; } char options[256]; - snprintf(options, sizeof(options), + snprintf(options, sizeof(options), "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d", hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0); - ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); - if(ret != CL_SUCCESS) - { - size_t len; - printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + /* create a hash for the compile time cache + * used data: + * - source code + * - device name + * - compile paramater + */ + std::string src_str(source_code); + src_str += options; + src_str += devNameVec.data(); + std::string hash_hex_str; + picosha2::hash256_hex_string(src_str, hash_hex_str); + + std::string cache_file = get_home() + "/.openclcache/" + hash_hex_str + ".openclbin"; + std::ifstream clBinFile(cache_file, std::ofstream::in | std::ofstream::binary); + if(!clBinFile.good()) + { + printer::inst()->print_msg(L1,"WARNING: OpenCL device %u - OpenCL binary %s not found.",ctx->deviceIdx, cache_file.c_str()); + ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret)); return ERR_OCL_API; } - char* BuildLog = (char*)malloc(len + 1); - BuildLog[0] = '\0'; - - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); + if(ret != CL_SUCCESS) { + size_t len; + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* BuildLog = (char*)malloc(len + 1); + BuildLog[0] = '\0'; + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + { + free(BuildLog); + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_str("Build log:\n"); + std::cerr<print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); return ERR_OCL_API; } - - printer::inst()->print_str("Build log:\n"); - std::cerr<Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL); + + + std::vector devices_ids(num_devices); + clGetProgramInfo(ctx->Program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL); + int dev_id = 0; + /* Search for the gpu within the program context. + * The id can be different to ctx->DeviceID. + */ + for(auto & ocl_device : devices_ids) + { + if(ocl_device == ctx->DeviceID) + break; + dev_id++; + } + + cl_build_status status; + do + { + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + return ERR_OCL_API; + } + port_sleep(1); + } + while(status == CL_BUILD_IN_PROGRESS); + + std::vector binary_sizes(num_devices); + clGetProgramInfo (ctx->Program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL); + + std::vector all_programs(num_devices); + std::vector> program_storage; - cl_build_status status; - do + int p_id = 0; + size_t mem_size = 0; + // create memory structure to query all OpenCL program binaries + for(auto & p : all_programs) + { + program_storage.emplace_back(std::vector(binary_sizes[p_id])); + all_programs[p_id] = program_storage[p_id].data(); + mem_size += binary_sizes[p_id]; + p_id++; + } + + if( ret = clGetProgramInfo(ctx->Program, CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret)); + return ERR_OCL_API; + } + + std::ofstream file_stream; + std::cout<print_msg(L1, "OpenCL device %u - OpenCL binary file stored in file %s.",ctx->deviceIdx, cache_file.c_str()); + } + else { - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + printer::inst()->print_msg(L1, "OpenCL device %u - Load OpenCL binary file %s",ctx->deviceIdx, cache_file.c_str()); + std::ostringstream ss; + ss << clBinFile.rdbuf(); + std::string s = ss.str(); + + size_t bin_size = s.size(); + auto data_ptr = s.data(); + + cl_int clStatus; + ctx->Program = clCreateProgramWithBinary( + opencl_ctx, 1, &ctx->DeviceID, &bin_size, + (const unsigned char **)&data_ptr, &clStatus, &ret + ); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str()); + return ERR_OCL_API; + } + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, NULL, NULL, NULL); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str()); return ERR_OCL_API; } - port_sleep(1); } - while(status == CL_BUILD_IN_PROGRESS); const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; for(int i = 0; i < 7; ++i) @@ -491,7 +641,7 @@ std::vector getAMDDevices(int index) printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name for device %u.", err_to_str(clStatus), k); continue; } - + std::string devVendor(devVendorVec.data()); if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) { @@ -716,6 +866,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_BLAKE256"), blake256CL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_GROESTL256"), groestl256CL); + // create a directory for the OpenCL compile cache + create_directory(get_home() + "/.openclcache"); + for(int i = 0; i < num_gpus; ++i) { if(ctx[i].stridedIndex == 2 && (ctx[i].rawIntensity % ctx[i].workSize) != 0) -- cgit v1.1 From dc0524c2d9efed2eb6fdf636e26a205e69e615f8 Mon Sep 17 00:00:00 2001 From: Jimmie Lin Date: Sun, 11 Mar 2018 00:32:42 +0800 Subject: Fix non-standard HTML in div (#1122) --- xmrstak/http/webdesign.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xmrstak/http/webdesign.cpp b/xmrstak/http/webdesign.cpp index c31d6c6..c2f0761 100644 --- a/xmrstak/http/webdesign.cpp +++ b/xmrstak/http/webdesign.cpp @@ -152,7 +152,7 @@ extern const char sHtmlMotdEntry[] = "
Message from %s" + "
" "" ""; @@ -166,7 +166,7 @@ extern const char sHtmlHashrateBodyLow [] = ""; extern const char sHtmlConnectionBodyHigh [] = - "
" + "
" "
Thread ID10s60s15mH/s
" "" "" @@ -183,7 +183,7 @@ extern const char sHtmlConnectionBodyLow [] = "
Pool address%s
Connected since%s
"; extern const char sHtmlResultBodyHigh [] = - "
" + "
" "" "" "" -- cgit v1.1 From 702ab5669912dd1bc9c15f9dd6854889cc09af5e Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 13 Mar 2018 20:32:07 +0100 Subject: fix shadowed variable A redefinition of a variable in a local scope avoid that the intensity is rounded to a multiple of the work size. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 79afa00..c45f211 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -1053,7 +1053,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) if(ctx->compMode) { // round up to next multiple of w_size - size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; // number of global threads must be a multiple of the work group size (w_size) assert(g_thd%w_size == 0); } -- cgit v1.1 From 83dbe2b97e2fdafbb84f3078f7f47343ec674246 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Tue, 13 Mar 2018 20:41:28 +0100 Subject: reenable MACOSX travis tests - remove `brew trap science` --- .travis.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4d53d48..de5b45f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -70,16 +70,15 @@ matrix: - CMAKE_C_COMPILER=gcc-7 - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" -# - os: osx -# compiler: gcc -# env: -# - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" + - os: osx + compiler: gcc + env: + - XMRSTAK_CMAKE_FLAGS="-DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF" before_install: - . CI/checkPRBranch - - if [ $TRAVIS_OS_NAME = osx ]; then + - if [ $TRAVIS_OS_NAME = osx ] ; then brew update; - brew tap homebrew/science; fi - export PATH=$CUDA_ROOT/bin:$PATH -- cgit v1.1 From 1e7911e653a267ffd71199cdf7afaf1cfed5bad0 Mon Sep 17 00:00:00 2001 From: xmr-stak-devs Date: Sun, 25 Mar 2018 13:21:57 +0100 Subject: XMR-Stak 2.3.0 RC Co-authored-by: psychocrypt Co-authored-by: fireice-uk Co-authored-by: Lee Clagett Co-authored-by: curie-kief --- CMakeLists.txt | 22 +- README.md | 12 +- doc/FAQ.md | 16 + doc/compile.md | 1 - doc/tuning.md | 39 +- doc/usage.md | 5 +- xmrstak/backend/amd/amd_gpu/gpu.cpp | 98 +++-- xmrstak/backend/amd/amd_gpu/gpu.hpp | 7 +- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 273 ++++++++++++- xmrstak/backend/amd/autoAdjust.hpp | 12 +- xmrstak/backend/amd/minethd.cpp | 38 +- xmrstak/backend/cpu/autoAdjustHwloc.hpp | 12 +- xmrstak/backend/cpu/crypto/cryptonight.h | 2 - xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 444 +++++++++++++++++++--- xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 24 +- xmrstak/backend/cpu/minethd.cpp | 267 ++++++++----- xmrstak/backend/cpu/minethd.hpp | 5 +- xmrstak/backend/cryptonight.hpp | 125 +++++- xmrstak/backend/miner_work.hpp | 6 + xmrstak/backend/nvidia/minethd.cpp | 38 +- xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp | 10 +- xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 231 ++++++++--- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 156 ++++++-- xmrstak/cli/cli-miner.cpp | 239 +++++++----- xmrstak/config.tpl | 23 -- xmrstak/jconf.cpp | 249 ++++++++---- xmrstak/jconf.hpp | 16 +- xmrstak/misc/executor.cpp | 53 ++- xmrstak/net/jpsock.cpp | 51 ++- xmrstak/net/jpsock.hpp | 5 +- xmrstak/net/socket.cpp | 24 +- xmrstak/net/socket.hpp | 4 + xmrstak/net/socks.hpp | 3 +- xmrstak/params.hpp | 5 + xmrstak/pools.tpl | 39 ++ xmrstak/version.cpp | 8 +- 36 files changed, 1963 insertions(+), 599 deletions(-) create mode 100644 xmrstak/pools.tpl diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b3c7eb..15a2684 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,10 +36,6 @@ if(NOT CMAKE_BUILD_TYPE) endif() set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${BUILD_TYPE}") -set(XMR-STAK_CURRENCY "all" CACHE STRING "select miner currency") -set_property(CACHE XMR-STAK_CURRENCY PROPERTY STRINGS "all;monero;aeon") - - set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture") set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic") if(XMR-STAK_COMPILE STREQUAL "native") @@ -53,16 +49,6 @@ else() message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'") endif() -if(XMR-STAK_CURRENCY STREQUAL "all") - message(STATUS "Set miner currency to 'monero' and 'aeon'") -elseif(XMR-STAK_CURRENCY STREQUAL "aeon") - message(STATUS "Set miner currency to 'aeon'") - add_definitions("-DCONF_NO_MONERO=1") -elseif(XMR-STAK_CURRENCY STREQUAL "monero") - message(STATUS "Set miner currency to 'monero'") - add_definitions("-DCONF_NO_AEON=1") -endif() - # option to add static libgcc and libstdc++ option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF) @@ -438,6 +424,14 @@ else() endif() +# add -Wall for debug builds with gcc +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") + endif() +endif() + # activate static libgcc and libstdc++ linking if(CMAKE_LINK_STATIC) set(BUILD_SHARED_LIBRARIES OFF) diff --git a/README.md b/README.md index bdcc6cf..d1ec255 100644 --- a/README.md +++ b/README.md @@ -38,14 +38,20 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV ## Supported altcoins -Besides Monero, following coins can be mined using this miner: +Besides [Monero](https://getmonero.org), following coins can be mined using this miner: -- [Aeon](http://www.aeon.cash/) +- [Aeon](http://www.aeon.cash) +- [Edollar](https://edollar.cash) - [Electroneum](https://electroneum.com) +- [Graft](https://www.graft.network) - [Intense](https://intensecoin.com) +- [Karbo](https://karbo.io) - [Sumokoin](https://www.sumokoin.org) -For all coins, except Aeon, you can use Monero settings. +If your prefered coin is not listed, you can chose one of the following algorithms: + +- Cryptonight - 2 MiB scratchpad memory +- Cryptonight-light - 1 MiB scratchpad memory Please note, this list is not complete, and is not an endorsement. diff --git a/doc/FAQ.md b/doc/FAQ.md index ffbc36f..f22bb53 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -8,6 +8,9 @@ * [Illegal instruction (core dumped)](#illegal-instruction) * [Virus Protection Alert](#virus-protection-alert) * [Change Currency to Mine](#change-currency-to-mine) +* [How can I mine Monero](#how-can-i-mine-monero) +* [Why is Monero named monero2](why-is-monero-named-monero2) +* [Which currency must be chosen if my fork coin is not listed](#which-currency-must-be-chosen-if-my-fork-coin-is-not-listed) ## "Obtaining SeLockMemoryPrivilege failed." @@ -70,3 +73,16 @@ If your antivirus software flags **xmr-stak**, it will likely move it to its qua If the miner is compiled for Monero and Aeon than you can change - the value `currency` in the config *or* - start the miner with the [command line option](usage.md) `--currency monero` or `--currency aeon` + +## How can I mine Monero + +Set the value `currency` in `pools.txt` to `monero2`. + +## Why is Monero named monero2 + +To avoid configuration conflicts after the hard fork of Monero to the new POW with our old naming schema where all cryptonight currencies was selected by choosing `monero` as currency we decided to switch to the name `monero2`. + +## Which currency must be chosen if my fork coin is not listed + +If your coin you want to mine is not listed please check the documentation of the coin and try to find out if `cryptonight` or `cryptonight-lite` is the used algorithm. +Select one of these generic coin algorithms. diff --git a/doc/compile.md b/doc/compile.md index 771c9d1..984c013 100644 --- a/doc/compile.md +++ b/doc/compile.md @@ -47,7 +47,6 @@ After the configuration you need to compile the miner, follow the guide for your - there is no *http* interface available if option is disabled: `cmake .. -DMICROHTTPD_ENABLE=OFF` - `OpenSSL_ENABLE` allow to disable/enable the dependency *OpenSSL* - it is not possible to connect to a *https* secured pool if option is disabled: `cmake .. -DOpenSSL_ENABLE=OFF` -- `XMR-STAK_CURRENCY` - compile for Monero(XMR) or Aeon(AEON) usage only e.g. `cmake .. -DXMR-STAK_CURRENCY=monero` - `XMR-STAK_COMPILE` select the CPU compute architecture (default: native) - native means the miner binary can be used only on the system where it is compiled but will archive the highest hash rate - use `cmake .. -DXMR-STAK_COMPILE=generic` to run the miner on all CPU's with sse2 diff --git a/doc/tuning.md b/doc/tuning.md index 5125387..47ad0bb 100644 --- a/doc/tuning.md +++ b/doc/tuning.md @@ -1,6 +1,7 @@ # Tuning Guide ## Content Overview +* [Benchmark](#benchmark) * [Windows](#windows) * [NVIDIA Backend](#nvidia-backend) * [Choose Value for `threads` and `blocks`](#choose-value-for-threads-and-blocks) @@ -8,11 +9,18 @@ * [AMD Backend](#amd-backend) * [Choose `intensity` and `worksize`](#choose-intensity-and-worksize) * [Add more GPUs](#add-more-gpus) + * [disable comp_mode](#disable-comp_mode) + * [change the scratchpad memory pattern](change-the-scratchpad-memory-pattern) * [Increase Memory Pool](#increase-memory-pool) * [Scratchpad Indexing](#scratchpad-indexing) * [CPU Backend](#cpu-backend) * [Choose Value for `low_power_mode`](#choose-value-for-low_power_mode) +## Benchmark +To benchmark the miner speed there are two ways. + - Mine against a pool end press the key `h` after 30 sec to see the hash report. + - Start the miner with the cli option `--benchmark BLOCKVERSION`. The miner will not connect to any pool and performs a 60sec performance benchmark with all enabled back-ends. + ## Windows "Run As Administrator" prompt (UAC) confirmation is needed to use large pages on Windows 7. On Windows 10 it is only needed once to set up the account to use them. @@ -46,8 +54,12 @@ To add a new GPU you need to add a new config set to `gpu_threads_conf`. ``` "gpu_threads_conf" : [ - { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false}, - { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false}, + { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, + "affine_to_cpu" : false, "sync_mode" : 3, + }, + { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, + "affine_to_cpu" : false, "sync_mode" : 3, + }, ], ``` @@ -70,13 +82,26 @@ If you are unsure of either GPU or platform index value, you can use `clinfo` to ``` "gpu_threads_conf" : [ - { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false }, - { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false }, + { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, + "strided_index" : true, "mem_chunk" : 2, "comp_mode" : true + }, + { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, + "strided_index" : true, "mem_chunk" : 2, "comp_mode" : true + }, ], "platform_index" : 0, ``` +### disable comp_mode + +`comp_mode` means compatibility mode and removes some checks in compute kernel those takes care that the miner can be used on a wide range of AMD/OpenCL GPU devices. +To avoid miner crashes the `intensity` should be a multiple of `worksize` if `comp_mode` is `false`. + +### change the scratchpad memory pattern + +By changing `strided_index` to `2` the number of contiguous elements (a 16 byte) for one miner thread can be fine tuned with the option `mem_chunk`. + ### Increase Memory Pool By setting the following environment variables before the miner is started OpenCl allows the miner to more threads. @@ -84,9 +109,9 @@ This variables must be set each time before the miner is started else it could b ``` export GPU_FORCE_64BIT_PTR=1 -export GPU_MAX_HEAP_SIZE=99 -export GPU_MAX_ALLOC_PERCENT=99 -export GPU_SINGLE_ALLOC_PERCENT=99 +export GPU_MAX_HEAP_SIZE=100 +export GPU_MAX_ALLOC_PERCENT=100 +export GPU_SINGLE_ALLOC_PERCENT=100 ``` *Note:* Windows user must use `set` instead of `export` to define an environment variable. diff --git a/doc/usage.md b/doc/usage.md index a810469..1f1fb09 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -5,7 +5,7 @@ * [Usage on Windows](#usage-on-windows) * [Usage on Linux](#usage-on-linux) * [Command Line Options](#command-line-options) -* [HTML and JSON API report configuraton](#xx) +* [HTML and JSON API report configuraton](#html-and-json-api-report-configuraton) ## Configurations @@ -13,12 +13,13 @@ Before you started the miner the first time there are no config files available. Config files will be created at the first start. The number of files depends on the available backends. `config.txt` contains the common miner settings. +`pools.txt` contains the selected mining pools and currency to mine. `amd.txt`, `cpu.txt` and `nvidia.txt` contains miner backend specific settings and can be used for further tuning ([Tuning Guide](tuning.md)). ## Usage on Windows 1) Double click the `xmr-stak.exe` file -2) Fill in the pool url, username and password +2) Fill in the pool url settings, currency, username and password `set XMRSTAK_NOWAIT=1` disable the dialog `Press any key to exit.` for non UAC execution. diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index c45f211..7547083 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -306,21 +306,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - size_t hashMemSize; - int threadMemMask; - int hasIterations; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - threadMemMask = MONERO_MASK; - hasIterations = MONERO_ITER; - } - else - { - hashMemSize = AEON_MEMORY; - threadMemMask = AEON_MASK; - hasIterations = AEON_ITER; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + int threadMemMask = cn_select_mask(::jconf::inst()->GetMiningAlgo()); + int hashIterations = cn_select_iter(::jconf::inst()->GetMiningAlgo()); size_t g_thd = ctx->rawIntensity; ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, hashMemSize * g_thd, NULL, &ret); @@ -384,11 +372,13 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - char options[256]; - snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0); + auto miner_algo = ::jconf::inst()->GetMiningAlgo(); + char options[512]; + snprintf(options, sizeof(options), + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d -DMEMORY=%llu -DALGO=%d", + hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<memChunk), ctx->compMode ? 1 : 0, + int_port(hashMemSize), int(miner_algo)); /* create a hash for the compile time cache * used data: * - source code @@ -529,8 +519,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ } } - const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; - for(int i = 0; i < 7; ++i) + const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein", "cn1_monero" }; + for(int i = 0; i < 8; ++i) { ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret); if(ret != CL_SUCCESS) @@ -887,7 +877,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_SUCCESS; } -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target) +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version) { cl_int ret; @@ -932,29 +922,65 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return(ERR_OCL_API); } - // CN2 Kernel + if(miner_algo == cryptonight_heavy) + { + // version + if ((ret = clSetKernelArg(ctx->Kernels[0], 4, sizeof(cl_uint), &version)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 4.", err_to_str(ret)); + return ERR_OCL_API; + } + } + + // CN1 Kernel + + /// @todo only activate if currency is monero + int cn_kernel_offset = 0; + if(miner_algo == cryptonight_monero && version >= 7) + { + cn_kernel_offset = 6; + } // Scratchpads - if((ret = clSetKernelArg(ctx->Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret)); return ERR_OCL_API; } // States - if((ret = clSetKernelArg(ctx->Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret)); return ERR_OCL_API; } // Threads - if((ret = clSetKernelArg(ctx->Kernels[1], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) + if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret)); return(ERR_OCL_API); } + if(miner_algo == cryptonight_monero && version >= 7) + { + // Input + if ((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, arugment 4(input buffer).", err_to_str(ret)); + return ERR_OCL_API; + } + } + else if(miner_algo == cryptonight_heavy) + { + // version + if ((ret = clSetKernelArg(ctx->Kernels[1], 3, sizeof(cl_uint), &version)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 3 (version).", err_to_str(ret)); + return ERR_OCL_API; + } + } + // CN3 Kernel // Scratchpads if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) @@ -1005,6 +1031,16 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return(ERR_OCL_API); } + if(miner_algo == cryptonight_heavy) + { + // version + if ((ret = clSetKernelArg(ctx->Kernels[2], 7, sizeof(cl_uint), &version)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 7.", err_to_str(ret)); + return ERR_OCL_API; + } + } + for(int i = 0; i < 4; ++i) { // States @@ -1039,7 +1075,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return ERR_SUCCESS; } -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version) { cl_int ret; cl_uint zero = 0; @@ -1092,7 +1128,13 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) }*/ size_t tmpNonce = ctx->Nonce; - if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + /// @todo only activate if currency is monero + int cn_kernel_offset = 0; + if(miner_algo == cryptonight_monero && version >= 7) + { + cn_kernel_offset = 6; + } + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1 + cn_kernel_offset], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1); return ERR_OCL_API; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 8fb7168..a387b15 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -1,6 +1,7 @@ #pragma once #include "xmrstak/misc/console.hpp" +#include "xmrstak/jconf.hpp" #if defined(__APPLE__) #include @@ -35,7 +36,7 @@ struct GpuContext cl_mem OutputBuffer; cl_mem ExtraBuffers[6]; cl_program Program; - cl_kernel Kernels[7]; + cl_kernel Kernels[8]; size_t freeMem; int computeUnits; std::string name; @@ -49,7 +50,7 @@ int getAMDPlatformIdx(); std::vector getAMDDevices(int index); size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target); -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version); +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version); diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 9383b04..7a36357 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -433,8 +433,18 @@ inline ulong getIdx() #endif } +inline uint4 mix_and_propagate(__local uint4 xin[8][WORKSIZE]) +{ + return xin[(get_local_id(1)) % 8][get_local_id(0)] ^ xin[(get_local_id(1) + 1) % 8][get_local_id(0)]; +} + __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) -__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads) +__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads +// cryptonight_heavy +#if (ALGO == 4) + , uint version +#endif +) { ulong State[25]; uint ExpandedKey1[40]; @@ -464,11 +474,11 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul states += 25 * gIdx; #if(STRIDED_INDEX==0) - Scratchpad += gIdx * (ITERATIONS >> 2); + Scratchpad += gIdx * (MEMORY >> 4); #elif(STRIDED_INDEX==1) Scratchpad += gIdx; #elif(STRIDED_INDEX==2) - Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); + Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif ((ulong8 *)State)[0] = vload8(0, input); @@ -507,13 +517,41 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul } mem_fence(CLK_LOCAL_MEM_FENCE); + +// cryptonight_heavy +#if (ALGO == 4) + if(version >= 3) + { + __local uint4 xin[8][WORKSIZE]; + + /* Also left over threads performe this loop. + * The left over thread results will be ignored + */ + for(size_t i=0; i < 16; i++) + { + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey1)[j]); + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); + } + } +#endif + #if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) #endif { + int iterations = MEMORY >> 7; +#if (ALGO == 4) + if(version < 3) + iterations >>= 1; +#endif #pragma unroll 2 - for(int i = 0; i < (ITERATIONS >> 5); ++i) + for(int i = 0; i < iterations; ++i) { #pragma unroll for(int j = 0; j < 10; ++j) @@ -525,8 +563,22 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul mem_fence(CLK_GLOBAL_MEM_FENCE); } +#define VARIANT1_1(p) \ + uint table = 0x75310U; \ + uint index = (((p).s2 >> 26) & 12) | (((p).s2 >> 23) & 2); \ + (p).s2 ^= ((table >> index) & 0x30U) << 24 + +#define VARIANT1_2(p) ((uint2 *)&(p))[0] ^= tweak1_2 + +#define VARIANT1_INIT() \ + tweak1_2 = as_uint2(input[4]); \ + tweak1_2.s0 >>= 24; \ + tweak1_2.s0 |= tweak1_2.s1 << 8; \ + tweak1_2.s1 = get_global_id(0); \ + tweak1_2 ^= as_uint2(states[24]) + __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Threads) +__kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulong Threads, __global ulong *input) { ulong a[2], b[2]; __local uint AES0[256], AES1[256], AES2[256], AES3[256]; @@ -544,6 +596,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre barrier(CLK_LOCAL_MEM_FENCE); + uint2 tweak1_2; uint4 b_x; #if(COMP_MODE==1) // do not use early return here @@ -552,11 +605,11 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre { states += 25 * gIdx; #if(STRIDED_INDEX==0) - Scratchpad += gIdx * (ITERATIONS >> 2); + Scratchpad += gIdx * (MEMORY >> 4); #elif(STRIDED_INDEX==1) Scratchpad += gIdx; #elif(STRIDED_INDEX==2) - Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); + Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif a[0] = states[0] ^ states[4]; @@ -565,6 +618,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre b[1] = states[3] ^ states[7]; b_x = ((uint4 *)b)[0]; + VARIANT1_INIT(); } mem_fence(CLK_LOCAL_MEM_FENCE); @@ -581,9 +635,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre ((uint4 *)c)[0] = Scratchpad[IDX((a[0] & MASK) >> 4)]; ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); - //b_x ^= ((uint4 *)c)[0]; - Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0]; + b_x ^= ((uint4 *)c)[0]; + VARIANT1_1(b_x); + Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x; uint4 tmp; tmp = Scratchpad[IDX((c[0] & MASK) >> 4)]; @@ -591,18 +646,129 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre a[1] += c[0] * as_ulong2(tmp).s0; a[0] += mul_hi(c[0], as_ulong2(tmp).s0); + VARIANT1_2(a[1]); Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; + VARIANT1_2(a[1]); + + ((uint4 *)a)[0] ^= tmp; + + b_x = ((uint4 *)c)[0]; + } + } + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Threads +// cryptonight_heavy +#if (ALGO == 4) + , uint version +#endif +) +{ + ulong a[2], b[2]; + __local uint AES0[256], AES1[256], AES2[256], AES3[256]; + + const ulong gIdx = getIdx(); + + for(int i = get_local_id(0); i < 256; i += WORKSIZE) + { + const uint tmp = AES0_C[i]; + AES0[i] = tmp; + AES1[i] = rotate(tmp, 8U); + AES2[i] = rotate(tmp, 16U); + AES3[i] = rotate(tmp, 24U); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + uint4 b_x; +#if(COMP_MODE==1) + // do not use early return here + if(gIdx < Threads) +#endif + { + states += 25 * gIdx; +#if(STRIDED_INDEX==0) + Scratchpad += gIdx * (MEMORY >> 4); +#elif(STRIDED_INDEX==1) + Scratchpad += gIdx; +#elif(STRIDED_INDEX==2) + Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); +#endif + + a[0] = states[0] ^ states[4]; + b[0] = states[2] ^ states[6]; + a[1] = states[1] ^ states[5]; + b[1] = states[3] ^ states[7]; + + b_x = ((uint4 *)b)[0]; + } + + mem_fence(CLK_LOCAL_MEM_FENCE); + +#if(COMP_MODE==1) + // do not use early return here + if(gIdx < Threads) +#endif + { + ulong idx0 = a[0]; + ulong mask = MASK; + + int iterations = ITERATIONS; +#if (ALGO == 4) + if(version < 3) + { + iterations <<= 1; + mask -= 0x200000; + } +#endif + #pragma unroll 8 + for(int i = 0; i < iterations; ++i) + { + ulong c[2]; + + ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & mask) >> 4)]; + ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); + //b_x ^= ((uint4 *)c)[0]; + + Scratchpad[IDX((idx0 & mask) >> 4)] = b_x ^ ((uint4 *)c)[0]; + + uint4 tmp; + tmp = Scratchpad[IDX((c[0] & mask) >> 4)]; + + a[1] += c[0] * as_ulong2(tmp).s0; + a[0] += mul_hi(c[0], as_ulong2(tmp).s0); + + Scratchpad[IDX((c[0] & mask) >> 4)] = ((uint4 *)a)[0]; ((uint4 *)a)[0] ^= tmp; + idx0 = a[0]; b_x = ((uint4 *)c)[0]; +// cryptonight_heavy +#if (ALGO == 4) + if(version >= 3) + { + long n = *((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4)))); + int d = ((__global int*)(Scratchpad + (IDX((idx0 & mask) >> 4))))[2]; + long q = n / (d | 0x5); + *((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4)))) = n ^ q; + idx0 = d ^ q; + } +#endif } } mem_fence(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) -__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads) +__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads +// cryptonight_heavy +#if (ALGO == 4) + , uint version +#endif + ) { __local uint AES0[256], AES1[256], AES2[256], AES3[256]; uint ExpandedKey2[40]; @@ -631,11 +797,11 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u { states += 25 * gIdx; #if(STRIDED_INDEX==0) - Scratchpad += gIdx * (ITERATIONS >> 2); + Scratchpad += gIdx * (MEMORY >> 4); #elif(STRIDED_INDEX==1) Scratchpad += gIdx; #elif(STRIDED_INDEX==2) - Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0); + Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); #endif #if defined(__Tahiti__) || defined(__Pitcairn__) @@ -655,13 +821,67 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u barrier(CLK_LOCAL_MEM_FENCE); +#if (ALGO == 4) + __local uint4 xin[8][WORKSIZE]; +#endif + #if(COMP_MODE==1) // do not use early return here if(gIdx < Threads) #endif { + int iterations = MEMORY >> 7; +#if (ALGO == 4) + if(version < 3) + { + iterations >>= 1; + #pragma unroll 2 + for(int i = 0; i < iterations; ++i) + { + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + + #pragma unroll 10 + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + } + } + else + { + #pragma unroll 2 + for(int i = 0; i < iterations; ++i) + { + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + + #pragma unroll 10 + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + + + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); + } + + #pragma unroll 2 + for(int i = 0; i < iterations; ++i) + { + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + + #pragma unroll 10 + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + + + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); + } + } +#else #pragma unroll 2 - for(int i = 0; i < (ITERATIONS >> 5); ++i) + for(int i = 0; i < iterations; ++i) { text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; @@ -669,7 +889,34 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u for(int j = 0; j < 10; ++j) text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); } +#endif + } + +// cryptonight_heavy +#if (ALGO == 4) + if(version >= 3) + { + /* Also left over threads performe this loop. + * The left over thread results will be ignored + */ + for(size_t i=0; i < 16; i++) + { + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + barrier(CLK_LOCAL_MEM_FENCE); + xin[get_local_id(1)][get_local_id(0)] = text; + barrier(CLK_LOCAL_MEM_FENCE); + text = mix_and_propagate(xin); + } + } +#endif +#if(COMP_MODE==1) + // do not use early return here + if(gIdx < Threads) +#endif + { vstore2(as_ulong2(text), get_local_id(1) + 4, states); } diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 8950105..ea057a0 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -83,15 +83,7 @@ private: constexpr size_t byteToMiB = 1024u * 1024u; - size_t hashMemSize; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - } - else - { - hashMemSize = AEON_MEMORY; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); std::string conf; for(auto& ctx : devVec) @@ -118,7 +110,7 @@ private: maxThreads = 2024u; } // increase all intensity limits by two for aeon - if(!::jconf::inst()->IsCurrencyMonero()) + if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) maxThreads *= 2u; // keep 128MiB memory free (value is randomly chosen) diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 8dfbce5..46a04d5 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -191,9 +191,20 @@ void minethd::work_main() uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); - cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyMonero()); + auto miner_algo = ::jconf::inst()->GetMiningAlgo(); + cn_hash_fun hash_fun; + if(miner_algo == cryptonight_monero || miner_algo == cryptonight_heavy) + { + // start with cryptonight and switch later if fork version is reached + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight); + } + else + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + globalStates::inst().iConsumeCnt++; + uint8_t version = 0; + while (bQuit == 0) { if (oWork.bStall) @@ -207,6 +218,16 @@ void minethd::work_main() std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); + uint8_t new_version = oWork.getVersion(); + if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero); + } + else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy); + } + version = new_version; continue; } @@ -215,7 +236,8 @@ void minethd::work_main() assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); uint64_t target = oWork.iTarget; - XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); + /// \todo add monero hard for version + XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo, version); if(oWork.bNiceHash) pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39); @@ -231,7 +253,7 @@ void minethd::work_main() cl_uint results[0x100]; memset(results,0,sizeof(cl_uint)*(0x100)); - XMRRunJob(pGpuCtx, results); + XMRRunJob(pGpuCtx, results, miner_algo, version); for(size_t i = 0; i < results[0xFF]; i++) { @@ -258,6 +280,16 @@ void minethd::work_main() } consume_work(); + uint8_t new_version = oWork.getVersion(); + if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero); + } + else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy); + } + version = new_version; } } diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index ddeb89b..568abb5 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -28,16 +28,8 @@ public: autoAdjust() { - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - halfHashMemSize = hashMemSize / 2u; - } - else - { - hashMemSize = AEON_MEMORY; - halfHashMemSize = hashMemSize / 2u; - } + hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + halfHashMemSize = hashMemSize / 2u; } bool printConfig() diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h index 631c39a..5c9a733 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight.h +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -7,8 +7,6 @@ extern "C" { #include #include -#include "xmrstak/backend/cryptonight.hpp" - typedef struct { uint8_t hash_state[224]; // Need only 200, explicit align diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index e4ccbc3..85373e8 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -16,6 +16,7 @@ #pragma once #include "cryptonight.h" +#include "xmrstak/backend/cryptonight.hpp" #include #include @@ -148,7 +149,20 @@ static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i *x7 = soft_aesenc(*x7, key); } -template +inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) +{ + __m128i tmp0 = x0; + x0 = _mm_xor_si128(x0, x1); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_xor_si128(x2, x3); + x3 = _mm_xor_si128(x3, x4); + x4 = _mm_xor_si128(x4, x5); + x5 = _mm_xor_si128(x5, x6); + x6 = _mm_xor_si128(x6, x7); + x7 = _mm_xor_si128(x7, tmp0); +} + +template void cn_explode_scratchpad(const __m128i* input, __m128i* output) { // This is more than we have registers, compiler will assign 2 keys on the stack @@ -166,6 +180,40 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) xin6 = _mm_load_si128(input + 10); xin7 = _mm_load_si128(input + 11); + if(ALGO == cryptonight_heavy) + { + for(size_t i=0; i < 16; i++) + { + if(SOFT_AES) + { + soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + } + else + { + aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + } + mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7); + } + } + for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { if(SOFT_AES) @@ -213,7 +261,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) } } -template +template void cn_implode_scratchpad(const __m128i* input, __m128i* output) { // This is more than we have registers, compiler will assign 2 keys on the stack @@ -275,6 +323,93 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); } + + if(ALGO == cryptonight_heavy) + mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); + } + + if(ALGO == cryptonight_heavy) + { + for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) + { + if(PREFETCH) + _mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA); + + xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); + xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); + xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); + xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); + + if(PREFETCH) + _mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA); + + xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); + xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); + xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); + xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); + + if(SOFT_AES) + { + soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + else + { + aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + + if(ALGO == cryptonight_heavy) + mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); + } + + for(size_t i=0; i < 16; i++) + { + if(SOFT_AES) + { + soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + else + { + aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + + mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); + } } _mm_store_si128(output + 4, xout0); @@ -287,13 +422,45 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } -template +inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = _mm_cvtsi128_si64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = _mm_cvtsi128_si64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + +template void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0) { + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if(ALGO == cryptonight_monero && len < 43) + { + memset(output, 0, 32); + return; + } + keccak((const uint8_t *)input, len, ctx0->hash_state, 200); + uint64_t monero_const; + if(ALGO == cryptonight_monero) + { + monero_const = *reinterpret_cast(reinterpret_cast(input) + 35); + monero_const ^= *(reinterpret_cast(ctx0->hash_state) + 24); + } + // Optim - 99% time boundary - cn_explode_scratchpad((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state); + cn_explode_scratchpad((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state); uint8_t* l0 = ctx0->long_state; uint64_t* h0 = (uint64_t*)ctx0->hash_state; @@ -315,8 +482,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero) + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + else + _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + idx0 = _mm_cvtsi128_si64(cx); + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0); bx0 = cx; @@ -333,14 +505,28 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c if(PREFETCH) _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); ah0 += lo; - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; + + if(ALGO == cryptonight_monero) + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ah0 ^= ch; idx0 = al0; + + if(ALGO == cryptonight_heavy) + { + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = d ^ q; + } } // Optim - 90% time boundary - cn_implode_scratchpad((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state); + cn_implode_scratchpad((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state); // Optim - 99% time boundary @@ -351,15 +537,34 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c // This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon // to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output // We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons) -template +template void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) { + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if(ALGO == cryptonight_monero && len < 43) + { + memset(output, 0, 64); + return; + } + keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200); keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200); + uint64_t monero_const_0, monero_const_1; + if(ALGO == cryptonight_monero) + { + monero_const_0 = *reinterpret_cast(reinterpret_cast(input) + 35); + monero_const_0 ^= *(reinterpret_cast(ctx[0]->hash_state) + 24); + monero_const_1 = *reinterpret_cast(reinterpret_cast(input) + len + 35); + monero_const_1 ^= *(reinterpret_cast(ctx[1]->hash_state) + 24); + } + // Optim - 99% time boundary - cn_explode_scratchpad((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state); - cn_explode_scratchpad((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state); + cn_explode_scratchpad((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state); + cn_explode_scratchpad((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state); uint8_t* l0 = ctx[0]->long_state; uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; @@ -387,7 +592,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero) + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + else + _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + idx0 = _mm_cvtsi128_si64(cx); bx0 = cx; @@ -401,7 +610,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); - _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); + if(ALGO == cryptonight_monero) + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); + else + _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); + idx1 = _mm_cvtsi128_si64(cx); bx1 = cx; @@ -417,11 +630,26 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axl0 += hi; axh0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = axl0; - ((uint64_t*)&l0[idx0 & MASK])[1] = axh0; + + if(ALGO == cryptonight_monero) + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0; + axh0 ^= ch; axl0 ^= cl; idx0 = axl0; + if(ALGO == cryptonight_heavy) + { + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = d ^ q; + } + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0); @@ -433,18 +661,33 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axl1 += hi; axh1 += lo; ((uint64_t*)&l1[idx1 & MASK])[0] = axl1; - ((uint64_t*)&l1[idx1 & MASK])[1] = axh1; + + if(ALGO == cryptonight_monero) + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; + else + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1; + axh1 ^= ch; axl1 ^= cl; idx1 = axl1; + if(ALGO == cryptonight_heavy) + { + int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; + int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; + idx1 = d ^ q; + } + if(PREFETCH) _mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0); } // Optim - 90% time boundary - cn_implode_scratchpad((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state); - cn_implode_scratchpad((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state); + cn_implode_scratchpad((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state); + cn_implode_scratchpad((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state); // Optim - 99% time boundary @@ -455,12 +698,10 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto } #define CN_STEP1(a, b, c, l, ptr, idx) \ - a = _mm_xor_si128(a, c); \ - idx = _mm_cvtsi128_si64(a); \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ - c = _mm_load_si128(ptr) + c = _mm_load_si128(ptr); #define CN_STEP2(a, b, c, l, ptr, idx) \ if(SOFT_AES) \ @@ -468,30 +709,64 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ c = _mm_aesenc_si128(c, a); \ b = _mm_xor_si128(b, c); \ - _mm_store_si128(ptr, b) + if(ALGO == cryptonight_monero) \ + cryptonight_monero_tweak((uint64_t*)ptr, b); \ + else \ + _mm_store_si128(ptr, b);\ #define CN_STEP3(a, b, c, l, ptr, idx) \ idx = _mm_cvtsi128_si64(c); \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ - b = _mm_load_si128(ptr) + b = _mm_load_si128(ptr); -#define CN_STEP4(a, b, c, l, ptr, idx) \ +#define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - _mm_store_si128(ptr, a) + if(ALGO == cryptonight_monero) \ + _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ + else \ + _mm_store_si128(ptr, a);\ + a = _mm_xor_si128(a, b); \ + idx = _mm_cvtsi128_si64(a); \ + if(ALGO == cryptonight_heavy) \ + { \ + int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ + int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ + int64_t q = n / (d | 0x5); \ + ((int64_t*)&l[idx & MASK])[0] = n ^ q; \ + idx = d ^ q; \ + } + +#define CONST_INIT(ctx, n) \ + __m128i mc##n = _mm_set_epi64x(*reinterpret_cast(reinterpret_cast(input) + n * len + 35) ^ \ + *(reinterpret_cast((ctx)->hash_state) + 24), 0); // This lovelier creation will do 3 cn hashes at a time. -template +template void cryptonight_triple_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) { + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if(ALGO == cryptonight_monero && len < 43) + { + memset(output, 0, 32 * 3); + return; + } + for (size_t i = 0; i < 3; i++) { keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); - cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); } + CONST_INIT(ctx[0], 0); + CONST_INIT(ctx[1], 1); + CONST_INIT(ctx[2], 2); + uint8_t* l0 = ctx[0]->long_state; uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; uint8_t* l1 = ctx[1]->long_state; @@ -509,9 +784,14 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto __m128i cx1 = _mm_set_epi64x(0, 0); __m128i cx2 = _mm_set_epi64x(0, 0); + uint64_t idx0, idx1, idx2; + idx0 = _mm_cvtsi128_si64(ax0); + idx1 = _mm_cvtsi128_si64(ax1); + idx2 = _mm_cvtsi128_si64(ax2); + for (size_t i = 0; i < ITERATIONS/2; i++) { - uint64_t idx0, idx1, idx2, hi, lo; + uint64_t hi, lo; __m128i *ptr0, *ptr1, *ptr2; // EVEN ROUND @@ -527,9 +807,9 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2); + CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); // ODD ROUND CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); @@ -544,29 +824,44 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1); CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2); + CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); } for (size_t i = 0; i < 3; i++) { - cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); keccakf((uint64_t*)ctx[i]->hash_state, 24); extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); } } // This even lovelier creation will do 4 cn hashes at a time. -template +template void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) { + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if(ALGO == cryptonight_monero && len < 43) + { + memset(output, 0, 32 * 4); + return; + } + for (size_t i = 0; i < 4; i++) { keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); - cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); } + CONST_INIT(ctx[0], 0); + CONST_INIT(ctx[1], 1); + CONST_INIT(ctx[2], 2); + CONST_INIT(ctx[3], 3); + uint8_t* l0 = ctx[0]->long_state; uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; uint8_t* l1 = ctx[1]->long_state; @@ -588,10 +883,16 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni __m128i cx1 = _mm_set_epi64x(0, 0); __m128i cx2 = _mm_set_epi64x(0, 0); __m128i cx3 = _mm_set_epi64x(0, 0); - + + uint64_t idx0, idx1, idx2, idx3; + idx0 = _mm_cvtsi128_si64(ax0); + idx1 = _mm_cvtsi128_si64(ax1); + idx2 = _mm_cvtsi128_si64(ax2); + idx3 = _mm_cvtsi128_si64(ax3); + for (size_t i = 0; i < ITERATIONS/2; i++) { - uint64_t idx0, idx1, idx2, idx3, hi, lo; + uint64_t hi, lo; __m128i *ptr0, *ptr1, *ptr2, *ptr3; // EVEN ROUND @@ -610,10 +911,10 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); - CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3); + CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); // ODD ROUND CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); @@ -631,30 +932,46 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2); CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3); - CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3); + CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3); } for (size_t i = 0; i < 4; i++) { - cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); keccakf((uint64_t*)ctx[i]->hash_state, 24); extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); } } // This most lovely creation will do 5 cn hashes at a time. -template +template void cryptonight_penta_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) { + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if(ALGO == cryptonight_monero && len < 43) + { + memset(output, 0, 32 * 5); + return; + } + for (size_t i = 0; i < 5; i++) { keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); - cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); } + CONST_INIT(ctx[0], 0); + CONST_INIT(ctx[1], 1); + CONST_INIT(ctx[2], 2); + CONST_INIT(ctx[3], 3); + CONST_INIT(ctx[4], 4); + uint8_t* l0 = ctx[0]->long_state; uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; uint8_t* l1 = ctx[1]->long_state; @@ -682,9 +999,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton __m128i cx3 = _mm_set_epi64x(0, 0); __m128i cx4 = _mm_set_epi64x(0, 0); + uint64_t idx0, idx1, idx2, idx3, idx4; + idx0 = _mm_cvtsi128_si64(ax0); + idx1 = _mm_cvtsi128_si64(ax1); + idx2 = _mm_cvtsi128_si64(ax2); + idx3 = _mm_cvtsi128_si64(ax3); + idx4 = _mm_cvtsi128_si64(ax4); + for (size_t i = 0; i < ITERATIONS/2; i++) { - uint64_t idx0, idx1, idx2, idx3, idx4, hi, lo; + uint64_t hi, lo; __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; // EVEN ROUND @@ -706,11 +1030,11 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4); - CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3); - CN_STEP4(ax4, bx4, cx4, l4, ptr4, idx4); + CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4); // ODD ROUND CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); @@ -731,16 +1055,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3); CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4); - CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3); - CN_STEP4(ax4, cx4, bx4, l4, ptr4, idx4); + CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4); } for (size_t i = 0; i < 5; i++) { - cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); keccakf((uint64_t*)ctx[i]->hash_state, 24); extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); } diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index 1026b04..17fa24b 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -28,9 +28,9 @@ extern "C" #include "c_jh.h" #include "c_skein.h" } +#include "xmrstak/backend/cryptonight.hpp" #include "cryptonight.h" #include "cryptonight_aesni.h" -#include "xmrstak/backend/cryptonight.hpp" #include "xmrstak/misc/console.hpp" #include "xmrstak/jconf.hpp" #include @@ -202,15 +202,8 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) { - size_t hashMemSize; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - } - else - { - hashMemSize = AEON_MEMORY; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096); if(use_fast_mem == 0) @@ -285,15 +278,8 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al void cryptonight_free_ctx(cryptonight_ctx* ctx) { - size_t hashMemSize; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - } - else - { - hashMemSize = AEON_MEMORY; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); + if(ctx->ctx_info[0] != 0) { #ifdef _WIN32 diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index cef4f8e..e263aca 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -231,45 +231,44 @@ bool minethd::self_test() bool bResult = true; - bool mineMonero = ::jconf::inst()->IsCurrencyMonero(); - if(mineMonero) + if(::jconf::inst()->GetMiningAlgo() == cryptonight) { unsigned char out[32 * MAX_N]; cn_hash_fun hashf; cn_hash_fun_multi hashf_multi; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, mineMonero); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf("This is a test", 14, out, ctx[0]); bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, mineMonero); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight); hashf("This is a test", 14, out, ctx[0]); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, mineMonero); + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx); bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, mineMonero); + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight); hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx); bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, mineMonero); + hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0; - hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, mineMonero); + hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0; - hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, mineMonero); + hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" @@ -277,6 +276,12 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; } + else if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite) + { + } + else if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero) + { + } for (int i = 0; i < MAX_N; i++) cryptonight_free_ctx(ctx[i]); @@ -340,48 +345,56 @@ void minethd::consume_work() globalStates::inst().inst().iConsumeCnt++; } -minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero) +minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo) { // We have two independent flag bits in the functions // therefore we will build a binary digit and select the // function as a two digit binary - // Digit order SOFT_AES, NO_PREFETCH, MINER_ALGO + + uint8_t algv; + switch(algo) + { + case cryptonight: + algv = 2; + break; + case cryptonight_lite: + algv = 1; + break; + case cryptonight_monero: + algv = 0; + break; + case cryptonight_heavy: + algv = 3; + break; + default: + algv = 2; + break; + } static const cn_hash_fun func_table[] = { - /* there will be 8 function entries if `CONF_NO_MONERO` and `CONF_NO_AEON` - * is not defined. If one is defined there will be 4 entries. - */ -#ifndef CONF_NO_MONERO - cryptonight_hash, - cryptonight_hash, - cryptonight_hash, - cryptonight_hash -#endif -#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO)) - // comma will be added only if Monero and Aeon is build - , -#endif -#ifndef CONF_NO_AEON - cryptonight_hash, - cryptonight_hash, - cryptonight_hash, - cryptonight_hash -#endif + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash }; - std::bitset<3> digit; - digit.set(0, !bNoPrefetch); - digit.set(1, !bHaveAes); - - // define aeon settings -#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO) - // ignore 3rd bit if only one currency is active - digit.set(2, 0); -#else - digit.set(2, !mineMonero); -#endif + std::bitset<2> digit; + digit.set(0, !bHaveAes); + digit.set(1, !bNoPrefetch); - return func_table[digit.to_ulong()]; + return func_table[ algv << 2 | digit.to_ulong() ]; } void minethd::work_main() @@ -401,7 +414,7 @@ void minethd::work_main() uint32_t* piNonce; job_result result; - hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()); + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()); ctx = minethd_alloc_ctx(); piHashVal = (uint64_t*)(result.bResult + 24); @@ -434,6 +447,22 @@ void minethd::work_main() if(oWork.bNiceHash) result.iNonce = *piNonce; + if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero) + { + if(oWork.bWorkBlob[0] >= 7) + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero); + else + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight); + } + + if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy) + { + if(oWork.bWorkBlob[0] >= 3) + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy); + else + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight); + } + while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { if ((iCount++ & 0xF) == 0) //Store stats every 16 hashes @@ -465,93 +494,105 @@ void minethd::work_main() cryptonight_free_ctx(ctx); } -minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero) +minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo) { // We have two independent flag bits in the functions // therefore we will build a binary digit and select the // function as a two digit binary - // Digit order SOFT_AES, NO_PREFETCH + + uint8_t algv; + switch(algo) + { + case cryptonight: + algv = 2; + break; + case cryptonight_lite: + algv = 1; + break; + case cryptonight_monero: + algv = 0; + break; + default: + algv = 2; + break; + } static const cn_hash_fun_multi func_table[] = { - /* there will be 8*(MAX_N-1) function entries if `CONF_NO_MONERO` and `CONF_NO_AEON` - * is not defined. If one is defined there will be 4*(MAX_N-1) entries. - */ -#ifndef CONF_NO_MONERO - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_penta_hash, - cryptonight_penta_hash, - cryptonight_penta_hash -#endif -#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO)) - // comma will be added only if Monero and Aeon is build - , -#endif -#ifndef CONF_NO_AEON - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_penta_hash, - cryptonight_penta_hash, - cryptonight_penta_hash -#endif + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash }; std::bitset<2> digit; - digit.set(0, !bNoPrefetch); - digit.set(1, !bHaveAes); - - // define aeon settings -#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO) - // ignore miner algo if only one currency is active - size_t miner_algo_base = 0; -#else - size_t miner_algo_base = mineMonero ? 0 : 4*(MAX_N-1); -#endif - - N = (N<2) ? 2 : (N>MAX_N) ? MAX_N : N; - return func_table[miner_algo_base + 4*(N-2) + digit.to_ulong()]; + digit.set(0, !bHaveAes); + digit.set(1, !bNoPrefetch); + + return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()]; } void minethd::double_work_main() { - multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero())); + multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo())); } void minethd::triple_work_main() { - multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero())); + multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo())); } void minethd::quad_work_main() { - multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero())); + multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo())); } void minethd::penta_work_main() { - multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero())); + multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo())); } template @@ -621,6 +662,22 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi) if(oWork.bNiceHash) iNonce = *piNonce[0]; + if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero) + { + if(oWork.bWorkBlob[0] >= 7) + hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero); + else + hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight); + } + + if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy) + { + if(oWork.bWorkBlob[0] >= 3) + hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy); + else + hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight); + } + while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { if ((iCount++ & 0x7) == 0) //Store stats every 8*N hashes diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 0433d0d..ef1bbd2 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -1,5 +1,6 @@ #pragma once +#include "xmrstak/jconf.hpp" #include "crypto/cryptonight.h" #include "xmrstak/backend/miner_work.hpp" #include "xmrstak/backend/iBackend.hpp" @@ -23,14 +24,14 @@ public: typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); - static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero); + static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo); static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id); static cryptonight_ctx* minethd_alloc_ctx(); private: typedef void (*cn_hash_fun_multi)(const void*, size_t, void*, cryptonight_ctx**); - static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero); + static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo); minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity); diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 0ef5ae7..fe10a9f 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -1,12 +1,123 @@ #pragma once +#include +#include +#include + +enum xmrstak_algo +{ + invalid_algo = 0, + cryptonight = 1, + cryptonight_lite = 2, + cryptonight_monero = 3, + cryptonight_heavy = 4 +}; // define aeon settings -#define AEON_MEMORY 1048576llu -#define AEON_MASK 0xFFFF0 -#define AEON_ITER 0x40000 +constexpr size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024; +constexpr uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0; +constexpr uint32_t CRYPTONIGHT_LITE_ITER = 0x40000; + +constexpr size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024; +constexpr uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; +constexpr uint32_t CRYPTONIGHT_ITER = 0x80000; + +constexpr size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024; +constexpr uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0; +constexpr uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000; + +template +inline constexpr size_t cn_select_memory() { return 0; } + +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } + +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_LITE_MEMORY; } + +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } + +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_HEAVY_MEMORY; } + + +inline size_t cn_select_memory(xmrstak_algo algo) +{ + switch(algo) + { + case cryptonight: + return CRYPTONIGHT_MEMORY; + case cryptonight_lite: + return CRYPTONIGHT_LITE_MEMORY; + case cryptonight_monero: + return CRYPTONIGHT_MEMORY; + case cryptonight_heavy: + return CRYPTONIGHT_HEAVY_MEMORY; + default: + return 0; + } +} + +template +inline constexpr uint32_t cn_select_mask() { return 0; } + +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } + +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_LITE_MASK; } + +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } + +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_HEAVY_MASK; } + +inline size_t cn_select_mask(xmrstak_algo algo) +{ + switch(algo) + { + case cryptonight: + return CRYPTONIGHT_MASK; + case cryptonight_lite: + return CRYPTONIGHT_LITE_MASK; + case cryptonight_monero: + return CRYPTONIGHT_MASK; + case cryptonight_heavy: + return CRYPTONIGHT_HEAVY_MASK; + default: + return 0; + } +} + +template +inline constexpr uint32_t cn_select_iter() { return 0; } + +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } + +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } + +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -// define xmr settings -#define MONERO_MEMORY 2097152llu -#define MONERO_MASK 0x1FFFF0 -#define MONERO_ITER 0x80000 +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } +inline size_t cn_select_iter(xmrstak_algo algo) +{ + switch(algo) + { + case cryptonight: + return CRYPTONIGHT_ITER; + case cryptonight_lite: + return CRYPTONIGHT_LITE_ITER; + case cryptonight_monero: + return CRYPTONIGHT_ITER; + case cryptonight_heavy: + return CRYPTONIGHT_HEAVY_ITER; + default: + return 0; + } +} diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index 4bfe429..9e5a4e4 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -74,5 +74,11 @@ namespace xmrstak return *this; } + + uint8_t getVersion() const + { + return bWorkBlob[0]; + } + }; } // namepsace xmrstak diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 867a998..153e4e3 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -237,12 +237,20 @@ void minethd::work_main() uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); - cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyMonero()); + auto miner_algo = ::jconf::inst()->GetMiningAlgo(); + cn_hash_fun hash_fun; + if(miner_algo == cryptonight_monero || miner_algo == cryptonight_heavy) + { + // start with cryptonight and switch later if fork version is reached + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight); + } + else + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); uint32_t iNonce; globalStates::inst().iConsumeCnt++; - bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero"); + uint8_t version = 0; while (bQuit == 0) { @@ -257,6 +265,16 @@ void minethd::work_main() std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); + uint8_t new_version = oWork.getVersion(); + if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero); + } + else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy); + } + version = new_version; continue; } @@ -281,11 +299,11 @@ void minethd::work_main() uint32_t foundNonce[10]; uint32_t foundCount; - cryptonight_extra_cpu_prepare(&ctx, iNonce); + cryptonight_extra_cpu_prepare(&ctx, iNonce, miner_algo, version); - cryptonight_core_cpu_hash(&ctx, mineMonero); + cryptonight_core_cpu_hash(&ctx, miner_algo, iNonce, version); - cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce); + cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce, miner_algo, version); for(size_t i = 0; i < foundCount; i++) { @@ -316,6 +334,16 @@ void minethd::work_main() } consume_work(); + uint8_t new_version = oWork.getVersion(); + if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero); + } + else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3) + { + hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy); + } + version = new_version; } } diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index afbdbaf..29a3523 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -3,6 +3,9 @@ #include #include +#include "xmrstak/jconf.hpp" +#include "xmrstak/backend/cryptonight.hpp" + typedef struct { int device_id; const char *device_name; @@ -20,6 +23,7 @@ typedef struct { uint32_t *d_result_nonce; uint32_t *d_long_state; uint32_t *d_ctx_state; + uint32_t *d_ctx_state2; uint32_t *d_ctx_a; uint32_t *d_ctx_b; uint32_t *d_ctx_key1; @@ -41,8 +45,8 @@ int cuda_get_devicecount( int* deviceCount); int cuda_get_deviceinfo(nvid_ctx *ctx); int cryptonight_extra_cpu_init(nvid_ctx *ctx); void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, uint32_t len); -void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce); -void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce); +void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo, uint8_t version); +void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo, uint8_t version); } -void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero); +void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce, uint8_t version); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index cc97274..ede578f 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -6,6 +6,8 @@ #include #include +#include "xmrstak/jconf.hpp" + #ifdef _WIN32 #include extern "C" void compat_usleep(uint64_t waitTime) @@ -106,8 +108,18 @@ __device__ __forceinline__ void storeGlobal32( T* addr, T const & val ) #endif } -template -__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state, uint32_t * __restrict__ ctx_key1 ) +template< typename T > +__device__ __forceinline__ void storeGlobal64( T* addr, T const & val ) +{ +#if (__CUDA_ARCH__ < 700) + asm volatile( "st.global.cg.u64 [%0], %1;" : : "l"( addr ), "l"( val ) ); +#else + *addr = val; +#endif +} + +template +__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state2, uint32_t * __restrict__ ctx_key1 ) { __shared__ uint32_t sharedMemory[1024]; @@ -117,7 +129,7 @@ __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int parti const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3; const int sub = ( threadIdx.x & 7 ) << 2; - const int batchsize = ITERATIONS >> bfactor; + const int batchsize = MEMORY >> bfactor; const int start = partidx * batchsize; const int end = start + batchsize; @@ -131,18 +143,18 @@ __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int parti if( partidx == 0 ) { // first round - MEMCPY8( text, ctx_state + thread * 50 + sub + 16, 2 ); + MEMCPY8( text, ctx_state2 + thread * 50 + sub + 16, 2 ); } else { // load previous text data - MEMCPY8( text, &long_state[( (uint64_t) thread << THREAD_SHIFT ) + sub + start - 32], 2 ); + MEMCPY8( text, &long_state[( (uint64_t) thread * MEMORY ) + sub + start - 32], 2 ); } __syncthreads( ); for ( int i = start; i < end; i += 32 ) { cn_aes_pseudo_round_mut( sharedMemory, text, key ); - MEMCPY8(&long_state[((uint64_t) thread << THREAD_SHIFT) + (sub + i)], text, 2); + MEMCPY8(&long_state[((uint64_t) thread * MEMORY) + (sub + i)], text, 2); } } @@ -157,33 +169,37 @@ __forceinline__ __device__ void unusedVar( const T& ) * - this method can be used with all compute architectures * - for =sm_30 - * @param sub thread number within the group, range [0;4) + * @param sub thread number within the group, range [0:group_n] * @param value value to share with other threads within the group - * @param src thread number within the group from where the data is read, range [0;4) + * @param src thread number within the group from where the data is read, range [0:group_n] */ +template __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_t sub,const int val,const uint32_t src) { #if( __CUDA_ARCH__ < 300 ) ptr[sub] = val; - return ptr[src&3]; + return ptr[src & (group_n-1)]; #else unusedVar( ptr ); unusedVar( sub ); # if(__CUDACC_VER_MAJOR__ >= 9) - return __shfl_sync(0xFFFFFFFF, val, src, 4 ); + return __shfl_sync(0xFFFFFFFF, val, src, group_n ); # else - return __shfl( val, src, 4 ); + return __shfl( val, src, group_n ); # endif #endif } -template +template #ifdef XMR_STAK_THREADS __launch_bounds__( XMR_STAK_THREADS * 4 ) #endif -__global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b ) +__global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state, + uint32_t startNonce, uint32_t * __restrict__ d_input ) { __shared__ uint32_t sharedMemory[1024]; @@ -192,6 +208,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti __syncthreads( ); const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 2; + const uint32_t nonce = startNonce + thread; const int sub = threadIdx.x & 3; const int sub2 = sub & 2; @@ -205,30 +222,48 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti return; int i, k; - uint32_t j; + uint32_t j; const int batchsize = (ITERATIONS * 2) >> ( 2 + bfactor ); const int start = partidx * batchsize; const int end = start + batchsize; - uint32_t * long_state = &d_long_state[(IndexType) thread << THREAD_SHIFT]; - uint32_t * ctx_a = d_ctx_a + thread * 4; - uint32_t * ctx_b = d_ctx_b + thread * 4; - uint32_t a, d[2]; + uint32_t * long_state = &d_long_state[(IndexType) thread * MEMORY]; + uint32_t a, d[2], idx0; uint32_t t1[2], t2[2], res; - a = ctx_a[sub]; - d[1] = ctx_b[sub]; + uint32_t tweak1_2[2]; + if (ALGO == cryptonight_monero) + { + uint32_t * state = d_ctx_state + thread * 50; + tweak1_2[0] = (d_input[8] >> 24) | (d_input[9] << 8); + tweak1_2[0] ^= state[48]; + tweak1_2[1] = nonce; + tweak1_2[1] ^= state[49]; + } + + a = (d_ctx_a + thread * 4)[sub]; + idx0 = shuffle<4>(sPtr,sub, a, 0); + if(ALGO == cryptonight_heavy) + { + if(partidx != 0) + { + // state is stored after all ctx_b states + idx0 = *(d_ctx_b + threads * 4 + thread); + } + } + d[1] = (d_ctx_b + thread * 4)[sub]; + #pragma unroll 2 for ( i = start; i < end; ++i ) { #pragma unroll 2 for ( int x = 0; x < 2; ++x ) { - j = ( ( shuffle(sPtr,sub, a, 0) & MASK ) >> 2 ) + sub; + j = ( ( idx0 & MASK ) >> 2 ) + sub; const uint32_t x_0 = loadGlobal32( long_state + j ); - const uint32_t x_1 = shuffle(sPtr,sub, x_0, sub + 1); - const uint32_t x_2 = shuffle(sPtr,sub, x_0, sub + 2); - const uint32_t x_3 = shuffle(sPtr,sub, x_0, sub + 3); + const uint32_t x_1 = shuffle<4>(sPtr,sub, x_0, sub + 1); + const uint32_t x_2 = shuffle<4>(sPtr,sub, x_0, sub + 2); + const uint32_t x_3 = shuffle<4>(sPtr,sub, x_0, sub + 3); d[x] = a ^ t_fn0( x_0 & 0xff ) ^ t_fn1( (x_1 >> 8) & 0xff ) ^ @@ -237,41 +272,74 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti //XOR_BLOCKS_DST(c, b, &long_state[j]); - t1[0] = shuffle(sPtr,sub, d[x], 0); - //long_state[j] = d[0] ^ d[1]; - storeGlobal32( long_state + j, d[0] ^ d[1] ); - + t1[0] = shuffle<4>(sPtr,sub, d[x], 0); + + const uint32_t z = d[0] ^ d[1]; + if(ALGO == cryptonight_monero) + { + const uint32_t table = 0x75310U; + const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2); + const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24; + storeGlobal32( long_state + j, sub == 2 ? fork_7 : z ); + } + else + storeGlobal32( long_state + j, z ); + //MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & MASK]); j = ( ( *t1 & MASK ) >> 2 ) + sub; uint32_t yy[2]; *( (uint64_t*) yy ) = loadGlobal64( ( (uint64_t *) long_state )+( j >> 1 ) ); uint32_t zz[2]; - zz[0] = shuffle(sPtr,sub, yy[0], 0); - zz[1] = shuffle(sPtr,sub, yy[1], 0); + zz[0] = shuffle<4>(sPtr,sub, yy[0], 0); + zz[1] = shuffle<4>(sPtr,sub, yy[1], 0); - t1[1] = shuffle(sPtr,sub, d[x], 1); + t1[1] = shuffle<4>(sPtr,sub, d[x], 1); #pragma unroll for ( k = 0; k < 2; k++ ) - t2[k] = shuffle(sPtr,sub, a, k + sub2); + t2[k] = shuffle<4>(sPtr,sub, a, k + sub2); *( (uint64_t *) t2 ) += sub2 ? ( *( (uint64_t *) t1 ) * *( (uint64_t*) zz ) ) : __umul64hi( *( (uint64_t *) t1 ), *( (uint64_t*) zz ) ); res = *( (uint64_t *) t2 ) >> ( sub & 1 ? 32 : 0 ); - storeGlobal32( long_state + j, res ); + + if(ALGO == cryptonight_monero) + { + const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res; + const uint32_t long_state_update = sub2 ? tweaked_res : res; + storeGlobal32( long_state + j, long_state_update ); + } + else + storeGlobal32( long_state + j, res ); + a = ( sub & 1 ? yy[1] : yy[0] ) ^ res; + idx0 = shuffle<4>(sPtr,sub, a, 0); + if(ALGO == cryptonight_heavy) + { + int64_t n = loadGlobal64( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3)); + int32_t d = loadGlobal32( (uint32_t*)(( (uint64_t *) long_state ) + (( idx0 & MASK) >> 3) + 1u )); + int64_t q = n / (d | 0x5); + + if(sub&1) + storeGlobal64( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3), n ^ q ); + + idx0 = d ^ q; + } } } if ( bfactor > 0 ) { - ctx_a[sub] = a; - ctx_b[sub] = d[1]; + (d_ctx_a + thread * 4)[sub] = a; + (d_ctx_b + thread * 4)[sub] = d[1]; + if(ALGO == cryptonight_heavy) + if(sub&1) + *(d_ctx_b + threads * 4 + thread) = idx0; } } -template +template __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 ) { __shared__ uint32_t sharedMemory[1024]; @@ -280,9 +348,10 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti __syncthreads( ); int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3; - int sub = ( threadIdx.x & 7 ) << 2; + int subv = ( threadIdx.x & 7 ); + int sub = subv << 2; - const int batchsize = ITERATIONS >> bfactor; + const int batchsize = MEMORY >> bfactor; const int start = partidx * batchsize; const int end = start + batchsize; @@ -294,20 +363,53 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti MEMCPY8( text, d_ctx_state + thread * 50 + sub + 16, 2 ); __syncthreads( ); + +#if( __CUDA_ARCH__ < 300 ) + extern __shared__ uint32_t shuffleMem[]; + volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC)); +#else + volatile uint32_t* sPtr = NULL; +#endif + for ( int i = start; i < end; i += 32 ) { #pragma unroll for ( int j = 0; j < 4; ++j ) - text[j] ^= long_state[((IndexType) thread << THREAD_SHIFT) + (sub + i + j)]; + text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; cn_aes_pseudo_round_mut( sharedMemory, text, key ); + + if(ALGO == cryptonight_heavy) + { +#pragma unroll + for ( int j = 0; j < 4; ++j ) + text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7); + } + } + + if(ALGO == cryptonight_heavy) + { + __syncthreads( ); + + for ( int i = start; i < end; i += 32 ) + { +#pragma unroll + for ( int j = 0; j < 4; ++j ) + text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; + + cn_aes_pseudo_round_mut( sharedMemory, text, key ); + +#pragma unroll + for ( int j = 0; j < 4; ++j ) + text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7); + } } MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 ); } -template -void cryptonight_core_gpu_hash(nvid_ctx* ctx) +template +void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) { dim3 grid( ctx->device_blocks ); dim3 block( ctx->device_threads ); @@ -329,9 +431,11 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx) for ( int i = 0; i < partcountOneThree; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, - ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key1 )); + ctx->d_long_state, + (ALGO == cryptonight_heavy ? ctx->d_ctx_state2 : ctx->d_ctx_state), + ctx->d_ctx_key1 )); if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); } @@ -342,7 +446,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx) CUDA_CHECK_MSG_KERNEL( ctx->device_id, "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**", - cryptonight_core_gpu_phase2<<< + cryptonight_core_gpu_phase2<<< grid, block4, block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) @@ -352,7 +456,10 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx) i, ctx->d_long_state, ctx->d_ctx_a, - ctx->d_ctx_b + ctx->d_ctx_b, + ctx->d_ctx_state, + nonce, + ctx->d_input ) ); @@ -361,25 +468,39 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx) for ( int i = 0; i < partcountOneThree; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key2 )); } } -void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero) +void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce, uint8_t version) { -#ifndef CONF_NO_MONERO - if(mineMonero) + + if(miner_algo == cryptonight_monero) { - cryptonight_core_gpu_hash(ctx); + if(version >= 7) + cryptonight_core_gpu_hash(ctx, startNonce); + else + cryptonight_core_gpu_hash(ctx, startNonce); } -#endif -#ifndef CONF_NO_AEON - if(!mineMonero) + else if(miner_algo == cryptonight_heavy) { - cryptonight_core_gpu_hash(ctx); + if(version >= 3) + cryptonight_core_gpu_hash(ctx, startNonce); + else + { + cryptonight_core_gpu_hash(ctx, startNonce); + } } -#endif + else if(miner_algo == cryptonight) + { + cryptonight_core_gpu_hash(ctx, startNonce); + } + else if(miner_algo == cryptonight_lite) + { + cryptonight_core_gpu_hash(ctx, startNonce); + } + } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 92259db..2f08a1a 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -28,6 +28,7 @@ uint64_t keccakf_rndc[24] ={ typedef unsigned char BitSequence; typedef unsigned long long DataLength; +#include "xmrstak/backend/cryptonight.hpp" #include "cryptonight.hpp" #include "cuda_extra.hpp" #include "cuda_keccak.hpp" @@ -36,6 +37,7 @@ typedef unsigned long long DataLength; #include "cuda_jh.hpp" #include "cuda_skein.hpp" #include "cuda_device.hpp" +#include "cuda_aes.hpp" __constant__ uint8_t d_sub_byte[16][16] ={ {0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 }, @@ -90,10 +92,33 @@ __device__ __forceinline__ void cryptonight_aes_set_key( uint32_t * __restrict__ } } -__global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 ) +__device__ __forceinline__ void mix_and_propagate( uint32_t* state ) +{ + uint32_t tmp0[4]; + for(size_t x = 0; x < 4; ++x) + tmp0[x] = (state)[x]; + + // set destination [0,6] + for(size_t t = 0; t < 7; ++t) + for(size_t x = 0; x < 4; ++x) + (state + 4 * t)[x] = (state + 4 * t)[x] ^ (state + 4 * (t + 1))[x]; + + // set destination 7 + for(size_t x = 0; x < 4; ++x) + (state + 4 * 7)[x] = (state + 4 * 7)[x] ^ tmp0[x]; +} + +template +__global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_state2, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 ) { int thread = ( blockDim.x * blockIdx.x + threadIdx.x ); + __shared__ uint32_t sharedMemory[1024]; + if(ALGO == cryptonight_heavy) + { + cn_aes_gpu_init( sharedMemory ); + __syncthreads( ); + } if ( thread >= threads ) return; @@ -113,20 +138,45 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric cn_keccak( (uint8_t *) input, len, (uint8_t *) ctx_state ); cryptonight_aes_set_key( ctx_key1, ctx_state ); cryptonight_aes_set_key( ctx_key2, ctx_state + 8 ); + XOR_BLOCKS_DST( ctx_state, ctx_state + 8, ctx_a ); XOR_BLOCKS_DST( ctx_state + 4, ctx_state + 12, ctx_b ); - - memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 ); memcpy( d_ctx_a + thread * 4, ctx_a, 4 * 4 ); memcpy( d_ctx_b + thread * 4, ctx_b, 4 * 4 ); + memcpy( d_ctx_key1 + thread * 40, ctx_key1, 40 * 4 ); memcpy( d_ctx_key2 + thread * 40, ctx_key2, 40 * 4 ); + memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 ); + + if(ALGO == cryptonight_heavy) + { + + for(int i=0; i < 16; i++) + { + for(size_t t = 4; t < 12; ++t) + { + cn_aes_pseudo_round_mut( sharedMemory, ctx_state + 4u * t, ctx_key1 ); + } + // scipt first 4 * 128bit blocks = 4 * 4 uint32_t values + mix_and_propagate(ctx_state + 4 * 4); + } + // double buffer to move manipulated state into phase1 + memcpy( d_ctx_state2 + thread * 50, ctx_state, 50 * 4 ); + } } -__global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state ) +template +__global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state,uint32_t * __restrict__ d_ctx_key2 ) { const int thread = blockDim.x * blockIdx.x + threadIdx.x; + __shared__ uint32_t sharedMemory[1024]; + + if(ALGO == cryptonight_heavy) + { + cn_aes_gpu_init( sharedMemory ); + __syncthreads( ); + } if ( thread >= threads ) return; @@ -134,11 +184,28 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 uint32_t * __restrict__ ctx_state = d_ctx_state + thread * 50; uint64_t hash[4]; uint32_t state[50]; - -#pragma unroll + + #pragma unroll for ( i = 0; i < 50; i++ ) state[i] = ctx_state[i]; + if(ALGO == cryptonight_heavy) + { + uint32_t key[40]; + + // load keys + MEMCPY8( key, d_ctx_key2 + thread * 40, 20 ); + + for(int i=0; i < 16; i++) + { + for(size_t t = 4; t < 12; ++t) + { + cn_aes_pseudo_round_mut( sharedMemory, state + 4u * t, key ); + } + // scipt first 4 * 128bit blocks = 4 * 4 uint32_t values + mix_and_propagate(state + 4 * 4); + } + } cn_keccakf2( (uint64_t *) state ); switch ( ( (uint8_t *) state )[0] & 0x03 ) @@ -212,23 +279,26 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) if(gpuArch < 70) CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); - size_t hashMemSize; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - } - else - { - hashMemSize = AEON_MEMORY; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); + size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize; + if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo()) + { + // extent ctx_b to hold the state of idx0 + ctx_b_size += sizeof(uint32_t) * wsize; + // create a double buffer for the state to exchange the mixed state to phase1 + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state2, 50 * sizeof(uint32_t) * wsize)); + } + else + ctx->d_ctx_state2 = ctx->d_ctx_state; + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize)); - CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, 4 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, ctx_b_size)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); @@ -239,7 +309,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) return 1; } -extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce) +extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo, uint8_t version) { int threadsperblock = 128; uint32_t wsize = ctx->device_blocks * ctx->device_threads; @@ -247,11 +317,22 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce dim3 grid( ( wsize + threadsperblock - 1 ) / threadsperblock ); dim3 block( threadsperblock ); - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, - ctx->d_ctx_state, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + if(miner_algo == cryptonight_heavy && version >= 3) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } + else + { + /* pass two times d_ctx_state because the second state is used later in phase1, + * the first is used than in phase3 + */ + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state, ctx->d_ctx_state, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } } -extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce) +extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo, uint8_t version) { int threadsperblock = 128; uint32_t wsize = ctx->device_blocks * ctx->device_threads; @@ -262,11 +343,23 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) )); - CUDA_CHECK_MSG_KERNEL( - ctx->device_id, - "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**", - cryptonight_extra_gpu_final<<>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state ) - ); + if(miner_algo == cryptonight_heavy && version >= 3) + { + CUDA_CHECK_MSG_KERNEL( + ctx->device_id, + "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**", + cryptonight_extra_gpu_final<<>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) + ); + } + else + { + // fallback for all other algorithms + CUDA_CHECK_MSG_KERNEL( + ctx->device_id, + "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**", + cryptonight_extra_gpu_final<<>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) + ); + } CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost )); CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost )); @@ -482,15 +575,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) ctx->total_device_memory = totalMemory; ctx->free_device_memory = freeMemory; - size_t hashMemSize; - if(::jconf::inst()->IsCurrencyMonero()) - { - hashMemSize = MONERO_MEMORY; - } - else - { - hashMemSize = AEON_MEMORY; - } + size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo()); #ifdef WIN32 /* We use in windows bfactor (split slow kernel into smaller parts) to avoid @@ -520,6 +605,9 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) // up to 16kibyte extra memory is used per thread for some kernel (lmem/local memory) // 680bytes are extra meta data memory per hash size_t perThread = hashMemSize + 16192u + 680u; + if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo()) + perThread += 50 * 4; // state double buffer + size_t max_intensity = limitedMemory / perThread; ctx->device_threads = max_intensity / ctx->device_blocks; // use only odd number of threads diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 9053844..e2d50f2 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -55,7 +55,7 @@ # include "xmrstak/misc/uac.hpp" #endif // _WIN32 -void do_benchmark(); +int do_benchmark(int block_version); void help() { @@ -64,47 +64,50 @@ void help() cout<<"Usage: "<> pool; std::string userName; @@ -149,7 +149,6 @@ std::string get_multipool_entry(bool& final) getline(std::cin, passwd); std::string rigid; - std::cin.clear(); std::cin.ignore(INT_MAX,'\n'); std::cout<<"- Rig identifier for pool-side statistics (needs pool support). Can be empty:"<> tmp; - std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower); } currency = tmp; } - auto& http_port = params::inst().httpd_port; - if(http_port == params::httpd_port_unset) - { -#if defined(CONF_NO_HTTPD) - http_port = params::httpd_port_disabled; -#else - std::cout<<"- Do you want to use the HTTP interface?" <> port) || port < 0 || port > 65535) - { - std::cin.clear(); - std::cin.ignore(INT_MAX, '\n'); - std::cout << "Invalid port number. Please enter a number between 0 and 65535." << std::endl; - } - - http_port = port; -#endif - } - auto& pool = params::inst().poolURL; bool userSetPool = true; if(pool.empty()) @@ -247,10 +221,7 @@ void do_guided_config() prompt_once(prompted); userSetPool = false; - if(currency == "monero") - std::cout<<"- Pool address: e.g. pool.usxmrpool.com:3333"<> pool; } @@ -263,6 +234,7 @@ void do_guided_config() std::cin >> userName; } + bool stdin_flushed = false; auto& passwd = params::inst().poolPasswd; if(passwd.empty() && !params::inst().userSetPwd) { @@ -270,6 +242,8 @@ void do_guided_config() // clear everything from stdin to allow an empty password std::cin.clear(); std::cin.ignore(INT_MAX,'\n'); + stdin_flushed = true; + std::cout<<"- Password (mostly empty or x):"<> port) || port < 0 || port > 65535) + { + std::cin.clear(); + std::cin.ignore(INT_MAX, '\n'); + std::cout << "Invalid port number. Please enter a number between 0 and 65535." << std::endl; + } + + http_port = port; +#endif + } + configTpl.replace("HTTP_PORT", std::to_string(http_port)); configTpl.write(params::inst().configFile); std::cout<<"Configuration stored in file '"<=argc ) + { + printer::inst()->print_msg(L0, "No argument for parameter '-C/--poolconf' given"); + win_exit(); + return 1; + } + params::inst().configFilePools = argv[i]; + } else if(opName.compare("-i") == 0 || opName.compare("--httpd") == 0) { ++i; @@ -600,6 +630,25 @@ int main(int argc, char *argv[]) { params::inst().allowUAC = false; } + else if(opName.compare("--benchmark") == 0) + { + ++i; + if( i >= argc ) + { + printer::inst()->print_msg(L0, "No argument for parameter '--benchmark' given"); + win_exit(); + return 1; + } + char* block_version = nullptr; + long int bversion = strtol(argv[i], &block_version, 10); + + if(bversion < 0 || bversion >= 256) + { + printer::inst()->print_msg(L0, "Benchmark block version must be in the range [0,255]"); + return 1; + } + params::inst().benchmark_block_version = bversion; + } else { printer::inst()->print_msg(L0, "Parameter unknown '%s'",argv[i]); @@ -612,7 +661,10 @@ int main(int argc, char *argv[]) if(!configEditor::file_exist(params::inst().configFile)) do_guided_config(); - if(!jconf::inst()->parse_config(params::inst().configFile.c_str())) + if(!configEditor::file_exist(params::inst().configFilePools)) + do_guided_pool_config(); + + if(!jconf::inst()->parse_config(params::inst().configFile.c_str(), params::inst().configFilePools.c_str())) { win_exit(); return 1; @@ -670,11 +722,14 @@ int main(int argc, char *argv[]) printer::inst()->print_str("'r' - results\n"); printer::inst()->print_str("'c' - connection\n"); printer::inst()->print_str("-------------------------------------------------------------------\n"); - if(::jconf::inst()->IsCurrencyMonero()) - printer::inst()->print_msg(L0,"Start mining: MONERO"); - else - printer::inst()->print_msg(L0,"Start mining: AEON"); + printer::inst()->print_msg(L0, "Mining coin: %s", jconf::inst()->GetMiningCoin().c_str()); + if(params::inst().benchmark_block_version >= 0) + { + printer::inst()->print_str("!!!! Doing only a benchmark and exiting. To mine, remove the '--benchmark' option. !!!!\n"); + return do_benchmark(params::inst().benchmark_block_version); + } + executor::inst()->ex_start(jconf::inst()->DaemonMode()); uint64_t lastTime = get_timestamp_ms(); @@ -709,23 +764,31 @@ int main(int argc, char *argv[]) return 0; } -void do_benchmark() +int do_benchmark(int block_version) { using namespace std::chrono; std::vector* pvThreads; - printer::inst()->print_msg(L0, "Running a 60 second benchmark..."); + printer::inst()->print_msg(L0, "Prepare benchmark for block version %d", block_version); + + uint8_t work[112]; + memset(work,0,112); + work[0] = static_cast(block_version); - uint8_t work[76] = {0}; - xmrstak::miner_work oWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0); + xmrstak::pool_data dat; + + xmrstak::miner_work oWork = xmrstak::miner_work(); pvThreads = xmrstak::BackendConnector::thread_starter(oWork); + printer::inst()->print_msg(L0, "Wait 30 sec until all backends are initialized"); + std::this_thread::sleep_for(std::chrono::seconds(30)); + + xmrstak::miner_work benchWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0); + printer::inst()->print_msg(L0, "Start a 60 second benchmark..."); + xmrstak::globalStates::inst().switch_work(benchWork, dat); uint64_t iStartStamp = get_timestamp_ms(); std::this_thread::sleep_for(std::chrono::seconds(60)); - - oWork = xmrstak::miner_work(); - xmrstak::pool_data dat; xmrstak::globalStates::inst().switch_work(oWork, dat); double fTotalHps = 0.0; @@ -734,9 +797,13 @@ void do_benchmark() double fHps = pvThreads->at(i)->iHashCount; fHps /= (pvThreads->at(i)->iTimestamp - iStartStamp) / 1000.0; - printer::inst()->print_msg(L0, "Thread %u: %.1f H/S", i, fHps); + auto bType = static_cast(pvThreads->at(i)->backendType); + std::string name(xmrstak::iBackend::getName(bType)); + + printer::inst()->print_msg(L0, "Benchmark Thread %u %s: %.1f H/S", i,name.c_str(), fHps); fTotalHps += fHps; } - printer::inst()->print_msg(L0, "Total: %.1f H/S", fTotalHps); + printer::inst()->print_msg(L0, "Benchmark Total: %.1f H/S", fTotalHps); + return 0; } diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl index 451ea7b..c95d142 100644 --- a/xmrstak/config.tpl +++ b/xmrstak/config.tpl @@ -1,28 +1,5 @@ R"===( /* - * pool_address - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported. - * wallet_address - Your wallet, or pool login. - * rig_id - Rig identifier for pool-side statistics (needs pool support). - * pool_password - Can be empty in most cases or "x". - * use_nicehash - Limit the nonce to 3 bytes as required by nicehash. - * use_tls - This option will make us connect using Transport Layer Security. - * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it. - * pool_weight - Pool weight is a number telling the miner how important the pool is. Miner will mine mostly at the pool - * with the highest weight, unless the pool fails. Weight must be an integer larger than 0. - * - * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com - */ -"pool_list" : -[ -POOLCONF], - -/* - * currency to mine - * allowed values: 'monero' or 'aeon' - */ -"currency" : "CURRENCY", - -/* * Network timeouts. * Because of the way this client is written it doesn't need to constantly talk (keep-alive) to the server to make * sure it is there. We detect a buggy / overloaded server by the call timeout. The default values will be ok for diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index c9d3a20..225fbe0 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -51,7 +51,7 @@ using namespace rapidjson; * This enum needs to match index in oConfigValues, otherwise we will get a runtime error */ enum configEnum { - aPoolList, bTlsSecureAlgo, sCurrency, iCallTimeout, iNetRetry, iGiveUpLimit, iVerboseLevel, bPrintMotd, iAutohashTime, + aPoolList, sCurrency, bTlsSecureAlgo, iCallTimeout, iNetRetry, iGiveUpLimit, iVerboseLevel, bPrintMotd, iAutohashTime, bFlushStdout, bDaemonMode, sOutputFile, iHttpdPort, sHttpLogin, sHttpPass, bPreferIpv4, bAesOverride, sUseSlowMem }; @@ -65,8 +65,8 @@ struct configVal { // kNullType means any type configVal oConfigValues[] = { { aPoolList, "pool_list", kArrayType }, - { bTlsSecureAlgo, "tls_secure_algo", kTrueType }, { sCurrency, "currency", kStringType }, + { bTlsSecureAlgo, "tls_secure_algo", kTrueType }, { iCallTimeout, "call_timeout", kNumberType }, { iNetRetry, "retry_time", kNumberType }, { iGiveUpLimit, "giveup_limit", kNumberType }, @@ -86,6 +86,28 @@ configVal oConfigValues[] = { constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); +struct xmrstak_coin_algo +{ + const char* coin_name; + xmrstak_algo algo; + const char* default_pool; +}; + +xmrstak_coin_algo coin_algos[] = { + { "aeon", cryptonight_lite, "mine.aeon-pool.com:5555" }, + { "cryptonight", cryptonight, nullptr }, + { "cryptonight_lite", cryptonight_lite, nullptr }, + { "edollar", cryptonight, nullptr }, + { "electroneum", cryptonight, nullptr }, + { "graft", cryptonight, nullptr }, + { "intense", cryptonight, nullptr }, + { "karbo", cryptonight, nullptr }, + { "monero2", cryptonight_monero, "pool.usxmrpool.com:3333" }, + { "sumokoin", cryptonight_heavy, nullptr } +}; + +constexpr size_t coin_alogo_size = (sizeof(coin_algos)/sizeof(coin_algos[0])); + inline bool checkType(Type have, Type want) { if(want == have) @@ -103,6 +125,7 @@ inline bool checkType(Type have, Type want) struct jconf::opaque_private { Document jsonDoc; + Document jsonDocPools; const Value* configValues[iConfigCnt]; //Compile time constant opaque_private() @@ -168,45 +191,6 @@ bool jconf::TlsSecureAlgos() return prv->configValues[bTlsSecureAlgo]->GetBool(); } -const std::string jconf::GetCurrency() -{ - auto& currency = xmrstak::params::inst().currency; - if(currency.empty()) - currency = prv->configValues[sCurrency]->GetString(); - if( -#ifndef CONF_NO_MONERO - // if monero is disabled at compile time, enable error message if selected currency is `monero` - !xmrstak::strcmp_i(currency, "monero") -#else - true -#endif - && -#ifndef CONF_NO_AEON - // if aeon is disabled at compile time, enable error message if selected currency is `aeon` - !xmrstak::strcmp_i(currency, "aeon") -#else - true -#endif - ) - { - printer::inst()->print_msg(L0, "ERROR: Wrong currency selected - '%s'.", currency.c_str()); - win_exit(); - } - return currency; -} - -bool jconf::IsCurrencyMonero() -{ - if(xmrstak::strcmp_i(GetCurrency(), "monero")) - { - return true; - } - else - { - return false; - } -} - bool jconf::PreferIpv4() { return prv->configValues[bPreferIpv4]->GetBool(); @@ -312,18 +296,68 @@ jconf::slow_mem_cfg jconf::GetSlowMemSetting() return unknown_value; } -bool jconf::parse_config(const char* sFilename) +std::string jconf::GetMiningCoin() { - FILE * pFile; - char * buffer; - size_t flen; + if(xmrstak::params::inst().currency.length() > 0) + return xmrstak::params::inst().currency; + else + return prv->configValues[sCurrency]->GetString(); +} - if(!check_cpu_features()) +void jconf::GetAlgoList(std::string& list) +{ + list.reserve(256); + for(size_t i=0; i < coin_alogo_size; i++) { - printer::inst()->print_msg(L0, "CPU support of SSE2 is required."); + list += "\t- "; + list += coin_algos[i].coin_name; + list += "\n"; + } +} + +bool jconf::IsOnAlgoList(std::string& needle) +{ + std::transform(needle.begin(), needle.end(), needle.begin(), ::tolower); + + if(needle == "monero") + { + printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n" + "Please use monero2 if you want to mine Monero, or name the coin that you want to mine."); return false; } + for(size_t i=0; i < coin_alogo_size; i++) + { + if(needle == coin_algos[i].coin_name) + return true; + } + return false; +} + +const char* jconf::GetDefaultPool(const char* needle) +{ + const char* default_example = "pool.example.com:3333"; + + for(size_t i=0; i < coin_alogo_size; i++) + { + if(strcmp(needle, coin_algos[i].coin_name) == 0) + { + if(coin_algos[i].default_pool != nullptr) + return coin_algos[i].default_pool; + else + return default_example; + } + } + + return default_example; +} + +bool jconf::parse_file(const char* sFilename, bool main_conf) +{ + FILE * pFile; + char * buffer; + size_t flen; + pFile = fopen(sFilename, "rb"); if (pFile == NULL) { @@ -372,46 +406,92 @@ bool jconf::parse_config(const char* sFilename) buffer[flen] = '}'; buffer[flen + 1] = '\0'; - prv->jsonDoc.Parse(buffer, flen+2); + Document& root = main_conf ? prv->jsonDoc : prv->jsonDocPools; + + root.Parse(buffer, flen+2); free(buffer); - if(prv->jsonDoc.HasParseError()) + if(root.HasParseError()) { - printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", - int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + printer::inst()->print_msg(L0, "JSON config parse error in '%s' (offset %llu): %s", + sFilename, int_port(root.GetErrorOffset()), GetParseError_En(root.GetParseError())); return false; } - - if(!prv->jsonDoc.IsObject()) + if(!root.IsObject()) { //This should never happen as we created the root ourselves - printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + printer::inst()->print_msg(L0, "Invalid config file '%s'. No root?", sFilename); return false; } - for(size_t i = 0; i < iConfigCnt; i++) + if(main_conf) { - if(oConfigValues[i].iName != i) + for(size_t i = 2; i < iConfigCnt; i++) { - printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); - return false; - } + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } - prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + prv->configValues[i] = GetObjectMember(root, oConfigValues[i].sName); - if(prv->configValues[i] == nullptr) - { - printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); - return false; - } + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file '%s'. Missing value \"%s\".", sFilename, oConfigValues[i].sName); + return false; + } - if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file '%s'. Value \"%s\" has unexpected type.", sFilename, oConfigValues[i].sName); + return false; + } + } + } + else + { + for(size_t i = 0; i < 2; i++) { - printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); - return false; + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } + + prv->configValues[i] = GetObjectMember(root, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file '%s'. Missing value \"%s\".", sFilename, oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file '%s'. Value \"%s\" has unexpected type.", sFilename, oConfigValues[i].sName); + return false; + } } } + return true; +} + +bool jconf::parse_config(const char* sFilename, const char* sFilenamePools) +{ + if(!check_cpu_features()) + { + printer::inst()->print_msg(L0, "CPU support of SSE2 is required."); + return false; + } + + if(!parse_file(sFilename, true)) + return false; + + if(!parse_file(sFilenamePools, false)) + return false; + size_t pool_cnt = prv->configValues[aPoolList]->Size(); if(pool_cnt == 0) { @@ -529,5 +609,38 @@ bool jconf::parse_config(const char* sFilename) } } + std::string ctmp = GetMiningCoin(); + std::transform(ctmp.begin(), ctmp.end(), ctmp.begin(), ::tolower); + + if(ctmp.length() == 0) + { + printer::inst()->print_msg(L0, "You need to specify the coin that you want to mine."); + return false; + } + + for(size_t i=0; i < coin_alogo_size; i++) + { + if(ctmp == "monero") + { + printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n" + "Please use monero2 if you want to mine Monero, or name the coin that you want to mine."); + return false; + } + + if(ctmp == coin_algos[i].coin_name) + { + mining_algo = coin_algos[i].algo; + break; + } + } + + if(mining_algo == invalid_algo) + { + std::string cl; + GetAlgoList(cl); + printer::inst()->print_msg(L0, "Unrecognised coin '%s', your options are:\n%s", ctmp.c_str(), cl.c_str()); + return false; + } + return true; } diff --git a/xmrstak/jconf.hpp b/xmrstak/jconf.hpp index 9a4e958..6874d37 100644 --- a/xmrstak/jconf.hpp +++ b/xmrstak/jconf.hpp @@ -1,12 +1,12 @@ #pragma once +#include "xmrstak/backend/cryptonight.hpp" #include "xmrstak/misc/environment.hpp" #include "params.hpp" #include #include - class jconf { public: @@ -18,7 +18,7 @@ public: return env.pJconfConfig; }; - bool parse_config(const char* sFilename = xmrstak::params::inst().configFile.c_str()); + bool parse_config(const char* sFilename, const char* sFilenamePools); struct pool_cfg { const char* sPoolAddr; @@ -48,8 +48,13 @@ public: bool TlsSecureAlgos(); - const std::string GetCurrency(); - bool IsCurrencyMonero(); + inline xmrstak_algo GetMiningAlgo() { return mining_algo; } + + std::string GetMiningCoin(); + + static void GetAlgoList(std::string& list); + static bool IsOnAlgoList(std::string& needle); + static const char* GetDefaultPool(const char* needle); uint64_t GetVerboseLevel(); bool PrintMotd(); @@ -78,9 +83,12 @@ public: private: jconf(); + bool parse_file(const char* sFilename, bool main_conf); + bool check_cpu_features(); struct opaque_private; opaque_private* prv; bool bHaveAes; + xmrstak_algo mining_algo; }; diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index c4ba26e..a620173 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -329,11 +329,14 @@ void executor::on_sock_ready(size_t pool_id) if(!pool->cmd_login()) { - if(!pool->have_sock_error()) + if(pool->have_call_error() && !pool->is_dev_pool()) { - log_socket_error(pool, pool->get_call_error()); - pool->disconnect(); + std::string str = "Login error: " + pool->get_call_error(); + log_socket_error(pool, std::move(str)); } + + if(!pool->have_sock_error()) + pool->disconnect(); } } @@ -406,14 +409,19 @@ void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob) void executor::on_miner_result(size_t pool_id, job_result& oResult) { jpsock* pool = pick_pool_by_id(pool_id); - bool is_monero = jconf::inst()->IsCurrencyMonero(); + + const char* backend_name = xmrstak::iBackend::getName(pvThreads->at(oResult.iThreadId)->backendType); + uint64_t backend_hashcount, total_hashcount = 0; + + backend_hashcount = pvThreads->at(oResult.iThreadId)->iHashCount.load(std::memory_order_relaxed); + for(size_t i = 0; i < pvThreads->size(); i++) + total_hashcount += pvThreads->at(i)->iHashCount.load(std::memory_order_relaxed); if(pool->is_dev_pool()) { //Ignore errors silently if(pool->is_running() && pool->is_logged_in()) - pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, pvThreads->at(oResult.iThreadId), is_monero); - + pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, backend_name, backend_hashcount, total_hashcount, jconf::inst()->GetMiningAlgo()); return; } @@ -424,7 +432,7 @@ void executor::on_miner_result(size_t pool_id, job_result& oResult) } size_t t_start = get_timestamp_ms(); - bool bResult = pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, pvThreads->at(oResult.iThreadId), is_monero); + bool bResult = pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, backend_name, backend_hashcount, total_hashcount, jconf::inst()->GetMiningAlgo()); size_t t_len = get_timestamp_ms() - t_start; if(t_len > 0xFFFF) @@ -540,19 +548,38 @@ void executor::ex_main() pools.emplace_back(i+1, params.poolURL.c_str(), params.poolUsername.c_str(), params.poolRigid.c_str(), params.poolPasswd.c_str(), 9.9, false, params.poolUseTls, "", params.nicehashMode); } - if(jconf::inst()->IsCurrencyMonero()) + switch(jconf::inst()->GetMiningAlgo()) { + case cryptonight_heavy: if(dev_tls) - pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false); + pools.emplace_front(0, "donate.xmr-stak.net:8888", "", "", "", 0.0, true, true, "", true); else - pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false); - } - else - { + pools.emplace_front(0, "donate.xmr-stak.net:5555", "", "", "", 0.0, true, false, "", true); + break; + + case cryptonight_monero: + if(dev_tls) + pools.emplace_front(0, "donate.xmr-stak.net:8800", "", "", "", 0.0, true, true, "", false); + else + pools.emplace_front(0, "donate.xmr-stak.net:5500", "", "", "", 0.0, true, false, "", false); + break; + + case cryptonight_lite: if(dev_tls) pools.emplace_front(0, "donate.xmr-stak.net:7777", "", "", "", 0.0, true, true, "", true); else pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", "", 0.0, true, false, "", true); + break; + + case cryptonight: + if(dev_tls) + pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false); + else + pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false); + break; + + default: + break; } ex_event ev; diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp index 9c413dc..95bcc9c 100644 --- a/xmrstak/net/jpsock.cpp +++ b/xmrstak/net/jpsock.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "jpsock.hpp" #include "socks.hpp" @@ -133,6 +134,7 @@ jpsock::~jpsock() std::string&& jpsock::get_call_error() { + call_error = false; return std::move(prv->oCallRsp.sCallErr); } @@ -189,11 +191,25 @@ bool jpsock::set_socket_error_strerr(const char* a, int res) void jpsock::jpsock_thread() { jpsock_thd_main(); + + if(!bHaveSocketError) + set_socket_error("Socket closed."); + executor::inst()->push_event(ex_event(std::move(sSocketError), quiet_close, pool_id)); - // If a call is wating, send an error to end it - bool bCallWaiting = false; std::unique_lock mlock(call_mutex); + bool bWait = prv->oCallRsp.pCallData != nullptr; + + // If a call is waiting, wait a little bit before blowing it out of the water + if(bWait) + { + mlock.unlock(); + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + mlock.lock(); + } + + // If the call is still there send an error to end it + bool bCallWaiting = false; if(prv->oCallRsp.pCallData != nullptr) { prv->oCallRsp.bHaveResponse = true; @@ -348,6 +364,7 @@ bool jpsock::process_line(char* line, size_t len) { prv->oCallRsp.pCallData = nullptr; prv->oCallRsp.sCallErr.assign(sError, iErrorLn); + call_error = true; } else prv->oCallRsp.pCallData->CopyFrom(*mt, prv->callAllocator); @@ -440,6 +457,7 @@ bool jpsock::connect(std::string& sConnectError) { ext_algo = ext_backend = ext_hashcount = ext_motd = false; bHaveSocketError = false; + call_error = false; sSocketError.clear(); iJobDiff = 0; connect_attempts++; @@ -596,7 +614,7 @@ bool jpsock::cmd_login() return true; } -bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, xmrstak::iBackend* bend, bool algo_full_cn) +bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo) { char cmd_buffer[1024]; char sNonce[9]; @@ -604,16 +622,35 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes /*Extensions*/ char sAlgo[64] = {0}; char sBackend[64] = {0}; - char sHashcount[64] = {0}; + char sHashcount[128] = {0}; if(ext_backend) - snprintf(sBackend, sizeof(sBackend), ",\"backend\":\"%s\"", xmrstak::iBackend::getName(bend->backendType)); + snprintf(sBackend, sizeof(sBackend), ",\"backend\":\"%s\"", backend_name); if(ext_hashcount) - snprintf(sHashcount, sizeof(sHashcount), ",\"hashcount\":%llu", int_port(bend->iHashCount.load(std::memory_order_relaxed))); + snprintf(sHashcount, sizeof(sHashcount), ",\"hashcount\":%llu,\"hashcount_total\":%llu", int_port(backend_hashcount), int_port(total_hashcount)); if(ext_algo) - snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", algo_full_cn ? "cryptonight" : "cryptonight-lite"); + { + const char* algo_name; + switch(algo) + { + case cryptonight: + algo_name = "cryptonight"; + break; + case cryptonight_lite: + algo_name = "cryptonight-lite"; + break; + case cryptonight_monero: + algo_name = "cryptonight-monero"; + break; + default: + algo_name = "unknown"; + break; + } + + snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", algo_name); + } bin2hex((unsigned char*)&iNonce, 4, sNonce); sNonce[8] = '\0'; diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp index d9e5542..2ddeeee 100644 --- a/xmrstak/net/jpsock.hpp +++ b/xmrstak/net/jpsock.hpp @@ -2,6 +2,7 @@ #include "xmrstak/backend/iBackend.hpp" #include "msgstruct.hpp" +#include "xmrstak/jconf.hpp" #include #include @@ -34,7 +35,7 @@ public: void disconnect(bool quiet = false); bool cmd_login(); - bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, xmrstak::iBackend* bend, bool algo_full_cn); + bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo); static bool hex2bin(const char* in, unsigned int len, unsigned char* out); static void bin2hex(const unsigned char* in, unsigned int len, char* out); @@ -62,6 +63,7 @@ public: bool get_pool_motd(std::string& strin); std::string&& get_call_error(); + bool have_call_error() { return call_error; } bool have_sock_error() { return bHaveSocketError; } inline static uint64_t t32_to_t64(uint32_t t) { return 0xFFFFFFFFFFFFFFFFULL / (0xFFFFFFFFULL / ((uint64_t)t)); } @@ -106,6 +108,7 @@ private: std::atomic bRunning; std::atomic bLoggedIn; std::atomic quiet_close; + std::atomic call_error; uint8_t* bJsonRecvMem; uint8_t* bJsonParseMem; diff --git a/xmrstak/net/socket.cpp b/xmrstak/net/socket.cpp index 89e9902..7c58a8e 100644 --- a/xmrstak/net/socket.cpp +++ b/xmrstak/net/socket.cpp @@ -48,6 +48,7 @@ bool plain_socket::set_hostname(const char* sAddr) char sAddrMb[256]; char *sTmp, *sPort; + sock_closed = false; size_t ln = strlen(sAddr); if (ln >= sizeof(sAddrMb)) return pCallback->set_socket_error("CONNECT error: Pool address overflow."); @@ -117,11 +118,16 @@ bool plain_socket::set_hostname(const char* sAddr) return pCallback->set_socket_error_strerr("CONNECT error: Socket creation failed "); } + int flag = 1; + /* If it fails, it fails, we won't loose too much sleep over it */ + setsockopt(hSocket, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)); + return true; } bool plain_socket::connect() { + sock_closed = false; int ret = ::connect(hSocket, pSockAddr->ai_addr, (int)pSockAddr->ai_addrlen); freeaddrinfo(pAddrRoot); @@ -135,6 +141,9 @@ bool plain_socket::connect() int plain_socket::recv(char* buf, unsigned int len) { + if(sock_closed) + return 0; + int ret = ::recv(hSocket, buf, len, 0); if(ret == 0) @@ -167,6 +176,7 @@ void plain_socket::close(bool free) { if(hSocket != INVALID_SOCKET) { + sock_closed = true; sock_close(hSocket); hSocket = INVALID_SOCKET; } @@ -211,12 +221,13 @@ void tls_socket::init_ctx() if(jconf::inst()->TlsSecureAlgos()) { - SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_COMPRESSION); + SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1); } } bool tls_socket::set_hostname(const char* sAddr) { + sock_closed = false; if(ctx == nullptr) { init_ctx(); @@ -233,6 +244,10 @@ bool tls_socket::set_hostname(const char* sAddr) return false; } + int flag = 1; + /* If it fails, it fails, we won't loose too much sleep over it */ + setsockopt(BIO_get_fd(bio, nullptr), IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)); + if(BIO_set_conn_hostname(bio, sAddr) != 1) { print_error(); @@ -248,7 +263,7 @@ bool tls_socket::set_hostname(const char* sAddr) if(jconf::inst()->TlsSecureAlgos()) { - if(SSL_set_cipher_list(ssl, "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4:!SHA1") != 1) + if(SSL_set_cipher_list(ssl, "HIGH:!aNULL:!PSK:!SRP:!MD5:!RC4:!SHA1") != 1) { print_error(); return false; @@ -260,6 +275,7 @@ bool tls_socket::set_hostname(const char* sAddr) bool tls_socket::connect() { + sock_closed = false; if(BIO_do_connect(bio) != 1) { print_error(); @@ -340,6 +356,9 @@ bool tls_socket::connect() int tls_socket::recv(char* buf, unsigned int len) { + if(sock_closed) + return 0; + int ret = BIO_read(bio, buf, len); if(ret == 0) @@ -360,6 +379,7 @@ void tls_socket::close(bool free) if(bio == nullptr || ssl == nullptr) return; + sock_closed = true; if(!free) { sock_close(BIO_get_fd(bio, nullptr)); diff --git a/xmrstak/net/socket.hpp b/xmrstak/net/socket.hpp index 192a32c..b09142d 100644 --- a/xmrstak/net/socket.hpp +++ b/xmrstak/net/socket.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include "socks.hpp" class jpsock; @@ -12,6 +13,9 @@ public: virtual int recv(char* buf, unsigned int len) = 0; virtual bool send(const char* buf) = 0; virtual void close(bool free) = 0; + +protected: + std::atomic sock_closed; }; class plain_socket : public base_socket diff --git a/xmrstak/net/socks.hpp b/xmrstak/net/socks.hpp index 1d25d3a..86749e5 100644 --- a/xmrstak/net/socks.hpp +++ b/xmrstak/net/socks.hpp @@ -62,9 +62,8 @@ inline const char* sock_gai_strerror(int err, char* buf, size_t len) #include /* Needed for close() */ #include #include -#if defined(__FreeBSD__) #include /* Needed for IPPROTO_TCP */ -#endif +#include inline void sock_init() {} typedef int SOCKET; diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp index bed3427..6928df5 100644 --- a/xmrstak/params.hpp +++ b/xmrstak/params.hpp @@ -40,6 +40,7 @@ struct params std::string currency; std::string configFile; + std::string configFilePools; std::string configFileAMD; std::string configFileNVIDIA; std::string configFileCPU; @@ -48,6 +49,9 @@ struct params std::string minerArg0; std::string minerArgs; + // block_version >= 0 enable benchmark + int benchmark_block_version = -1; + params() : binaryName("xmr-stak"), executablePrefix(""), @@ -55,6 +59,7 @@ struct params useNVIDIA(true), useCPU(true), configFile("config.txt"), + configFilePools("pools.txt"), configFileAMD("amd.txt"), configFileCPU("cpu.txt"), configFileNVIDIA("nvidia.txt") diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl new file mode 100644 index 0000000..0b7084f --- /dev/null +++ b/xmrstak/pools.tpl @@ -0,0 +1,39 @@ +R"===( +/* + * pool_address - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported. + * wallet_address - Your wallet, or pool login. + * rig_id - Rig identifier for pool-side statistics (needs pool support). + * pool_password - Can be empty in most cases or "x". + * use_nicehash - Limit the nonce to 3 bytes as required by nicehash. + * use_tls - This option will make us connect using Transport Layer Security. + * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it. + * pool_weight - Pool weight is a number telling the miner how important the pool is. Miner will mine mostly at the pool + * with the highest weight, unless the pool fails. Weight must be an integer larger than 0. + * + * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com + */ + +"pool_list" : +[ +POOLCONF], + +/* + * Currency to mine. Supported values: + * + * aeon + * cryptonight (try this if your coin is not listed) + * cryptonight_lite + * edollar + * electroneum + * graft + * intense + * karbo + * monero2 (use this for Monero's new PoW) + * sumokoin + * + */ + +"currency" : "CURRENCY", + +)===" + diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index 770834e..1444b30 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -32,13 +32,7 @@ #define OS_TYPE "unk" #endif -#if defined(CONF_NO_AEON) -#define COIN_TYPE "monero" -#elif defined(CONF_NO_MONERO) -#define COIN_TYPE "aeon" -#else -#define COIN_TYPE "aeon-monero" -#endif +#define COIN_TYPE "aeon-cryptonight-monero" #define XMRSTAK_PP_TOSTRING1(str) #str #define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str) -- cgit v1.1 From 5d2cae77f9a08e01b39429407090580a3dd42935 Mon Sep 17 00:00:00 2001 From: Juan Leni Date: Sun, 25 Mar 2018 11:53:13 +0200 Subject: Fixing allocation issue --- .gitignore | 5 +++++ xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5b0be96..4a4c233 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,8 @@ config-debug.txt # KDevelop files .kdev4/ xmr-stak.kdev4 + +# Idea/Clion project files +cmake-build-release/ +cmake-build-debug/ +\.idea/ diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 2f08a1a..8be1460 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -299,7 +299,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, ctx_b_size)); - CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 28 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); CUDA_CHECK_MSG( -- cgit v1.1 From f46d29544fa3a42d74ba31eb66c69b66f13207eb Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 15:07:10 +0200 Subject: fix input size on device --- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 8be1460..0925fd5 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -127,7 +127,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric uint32_t ctx_b[4]; uint32_t ctx_key1[40]; uint32_t ctx_key2[40]; - uint32_t input[21]; + uint32_t input[28]; memcpy( input, d_input, len ); //*((uint32_t *)(((char *)input) + 39)) = startNonce + thread; -- cgit v1.1 From fe5a01a316e5a0e35176dc4d01cd6c41de6b0aaf Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 20:02:07 +0200 Subject: increase version to 2.3.0 --- xmrstak/version.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index 1444b30..c7dbf9d 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.2.0" +#define XMR_STAK_VERSION "2.3.0" #if defined(_WIN32) #define OS_TYPE "win" -- cgit v1.1 From 0f9392f1171b33981b98b493b401a524ad68a756 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 20:09:39 +0200 Subject: OpenCL precompiled code message change warning when precompiled OpenCL code is not found --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 7547083..8d0fd32 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -395,7 +395,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ std::ifstream clBinFile(cache_file, std::ofstream::in | std::ofstream::binary); if(!clBinFile.good()) { - printer::inst()->print_msg(L1,"WARNING: OpenCL device %u - OpenCL binary %s not found.",ctx->deviceIdx, cache_file.c_str()); + printer::inst()->print_msg(L1,"OpenCL device %u - Precompiled code %s not found. Compiling ...",ctx->deviceIdx, cache_file.c_str()); ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); if(ret != CL_SUCCESS) { @@ -489,11 +489,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ file_stream.open(cache_file, std::ofstream::out | std::ofstream::binary); file_stream.write(all_programs[dev_id], binary_sizes[dev_id]); file_stream.close(); - printer::inst()->print_msg(L1, "OpenCL device %u - OpenCL binary file stored in file %s.",ctx->deviceIdx, cache_file.c_str()); + printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code stored in file %s",ctx->deviceIdx, cache_file.c_str()); } else { - printer::inst()->print_msg(L1, "OpenCL device %u - Load OpenCL binary file %s",ctx->deviceIdx, cache_file.c_str()); + printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled cod from file %s",ctx->deviceIdx, cache_file.c_str()); std::ostringstream ss; ss << clBinFile.rdbuf(); std::string s = ss.str(); -- cgit v1.1 From d82630be5881a494e9b11343e4383700d97638e0 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 21:06:21 +0200 Subject: fixAMD benchmark mode The OpenCl code is only supporting jobs up to 84byte --- xmrstak/cli/cli-miner.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index e2d50f2..4f725bd 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -783,7 +783,10 @@ int do_benchmark(int block_version) printer::inst()->print_msg(L0, "Wait 30 sec until all backends are initialized"); std::this_thread::sleep_for(std::chrono::seconds(30)); - xmrstak::miner_work benchWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0); + /* AMD and NVIDIA is currently only supporting work sizes up to 84byte + * \todo fix this issue + */ + xmrstak::miner_work benchWork = xmrstak::miner_work("", work, 84, 0, false, 0); printer::inst()->print_msg(L0, "Start a 60 second benchmark..."); xmrstak::globalStates::inst().switch_work(benchWork, dat); uint64_t iStartStamp = get_timestamp_ms(); -- cgit v1.1 From 5498b16ba54671f9cd7d8d6c14b8a3610e73637d Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 21:16:09 +0200 Subject: move option currency down move option `--currency` down --- xmrstak/cli/cli-miner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index e2d50f2..9a1becf 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -72,7 +72,6 @@ void help() #ifdef _WIN32 cout<<" --noUAC disable the UAC dialog"< Date: Sun, 25 Mar 2018 21:42:22 +0200 Subject: revert input size change revert #1198, the block size is limited to 84byte --- xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 0925fd5..c2a1f87 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -127,7 +127,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric uint32_t ctx_b[4]; uint32_t ctx_key1[40]; uint32_t ctx_key2[40]; - uint32_t input[28]; + uint32_t input[21]; memcpy( input, d_input, len ); //*((uint32_t *)(((char *)input) + 39)) = startNonce + thread; @@ -299,7 +299,8 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, ctx_b_size)); - CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 28 * sizeof (uint32_t ) )); + // POW block format http://monero.wikia.com/wiki/PoW_Block_Header_Format + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); CUDA_CHECK_MSG( -- cgit v1.1 From 487ab2f271b56ed963b03e1783a74de34625b070 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 22:03:20 +0200 Subject: rename `monero2` to `monero7` fix #1202 --- doc/FAQ.md | 11 ++++++----- xmrstak/jconf.cpp | 6 +++--- xmrstak/pools.tpl | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/FAQ.md b/doc/FAQ.md index f22bb53..8739fc4 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -9,7 +9,7 @@ * [Virus Protection Alert](#virus-protection-alert) * [Change Currency to Mine](#change-currency-to-mine) * [How can I mine Monero](#how-can-i-mine-monero) -* [Why is Monero named monero2](why-is-monero-named-monero2) +* [Why is Monero named monero7](why-is-monero-named-monero7) * [Which currency must be chosen if my fork coin is not listed](#which-currency-must-be-chosen-if-my-fork-coin-is-not-listed) ## "Obtaining SeLockMemoryPrivilege failed." @@ -72,15 +72,16 @@ If your antivirus software flags **xmr-stak**, it will likely move it to its qua If the miner is compiled for Monero and Aeon than you can change - the value `currency` in the config *or* - - start the miner with the [command line option](usage.md) `--currency monero` or `--currency aeon` + - start the miner with the [command line option](usage.md) `--currency monero7` or `--currency aeon` + - run `xmr-stak --help` to see all supported currencies and algorithms ## How can I mine Monero -Set the value `currency` in `pools.txt` to `monero2`. +Set the value `currency` in `pools.txt` to `monero7`. -## Why is Monero named monero2 +## Why is Monero named monero7 -To avoid configuration conflicts after the hard fork of Monero to the new POW with our old naming schema where all cryptonight currencies was selected by choosing `monero` as currency we decided to switch to the name `monero2`. +To avoid configuration conflicts after the hard fork of Monero to the new POW with our old naming schema where all cryptonight currencies was selected by choosing `monero` as currency we decided to switch to the name `monero7`. ## Which currency must be chosen if my fork coin is not listed diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index 225fbe0..713beb4 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -102,7 +102,7 @@ xmrstak_coin_algo coin_algos[] = { { "graft", cryptonight, nullptr }, { "intense", cryptonight, nullptr }, { "karbo", cryptonight, nullptr }, - { "monero2", cryptonight_monero, "pool.usxmrpool.com:3333" }, + { "monero7", cryptonight_monero, "pool.usxmrpool.com:3333" }, { "sumokoin", cryptonight_heavy, nullptr } }; @@ -322,7 +322,7 @@ bool jconf::IsOnAlgoList(std::string& needle) if(needle == "monero") { printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n" - "Please use monero2 if you want to mine Monero, or name the coin that you want to mine."); + "Please use 'monero7' (support auto switch to new POW) if you want to mine Monero, \nor name the coin that you want to mine."); return false; } @@ -623,7 +623,7 @@ bool jconf::parse_config(const char* sFilename, const char* sFilenamePools) if(ctmp == "monero") { printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n" - "Please use monero2 if you want to mine Monero, or name the coin that you want to mine."); + "Please use monero7 (support auto switch to new POW) if you want to mine Monero, or name the coin that you want to mine."); return false; } diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index 0b7084f..d3a8a5a 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -28,7 +28,7 @@ POOLCONF], * graft * intense * karbo - * monero2 (use this for Monero's new PoW) + * monero7 (use this for Monero's new PoW) * sumokoin * */ -- cgit v1.1 From 2e6fdeba0ebe747754a866058b0583cd7d8ddbbe Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Sun, 25 Mar 2018 21:10:51 +0100 Subject: Change video guide to a placeholder link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d1ec255..f2f46ae 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV ## Video setup guide on Windows -[](https://www.youtube.com/watch?v=m9XFoQvLH8Y) +[](https://www.youtube.com/watch?v=-8paGFwxyMU) ###### Video by Crypto Sewer ## Overview -- cgit v1.1 From 32857572dfe55e1beecbfd76f376691f764bd737 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 25 Mar 2018 23:03:33 +0200 Subject: remove old release signatures the signatures for the next release will be in the release note itself --- README.md | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/README.md b/README.md index f2f46ae..29e1e12 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,6 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV * [HowTo Compile](doc/compile.md) * [FAQ](doc/FAQ.md) * [Developer Donation](#default-developer-donation) -* [Release Cheksums](#release-checksums) * [Developer PGP Key's](doc/pgp_keys.md) ## Features @@ -75,45 +74,3 @@ psychocrypt: ``` 45tcqnJMgd3VqeTznNotiNj4G9PQoK67TGRiHyj6EYSZ31NUbAfs9XdiU5squmZb717iHJLxZv3KfEw8jCYGL5wa19yrVCn ``` - -## Release Checksums - -Please use the [Developer PGP Key's](doc/pgp_keys.md) to verify the integrity of the precompiled binaries. - -``` ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA256 - -XMR-Stak 2.2.0 Windows Build Checksums - -compiled by: psychocrypt - -$ sha1sum * -3f1634244ccd336f7df581e3c82e1c6ca38ce714 libeay32.dll -538f3bd9dfcafc379e912562bcf343333f5375c7 ssleay32.dll -302e5be7c97fcd4922bf99b3533c0523ead5d109 xmrstak_cuda_backend.dll -ad6b9e62a7ea132e1bec0efd8d9e5f8a2ae531ca xmr-stak.exe -393bc5deb7e59e61cc7f4ccc0f4438402422f3b0 xmrstak_opencl_backend.dll - -$ sha3sum * -5aeefca7278be1b2706d99bf89fa23646931f881aff8bbca33654eb1 libeay32.dll -6b696caa620b0c6372881b11e503313152b5191c2d5497b26f81ab79 ssleay32.dll -038de57a707664c7c3ab3a74c8bdb3ed4e22000a74d8b7c359c7c4b5 xmrstak_cuda_backend.dll -19ab61049051178a362dc0d1c17af06f5ca1eb0a75182c0388e5aa22 xmr-stak.exe -cc7ba0fbde50d72df2a530ce52a831578cfa19999841eb954554a022 xmrstak_opencl_backend.dll - -date -Fri Dec 22 22:09:59 CET 2017 - ------BEGIN PGP SIGNATURE----- -Version: GnuPG v2 - -iQEcBAEBCAAGBQJaPXYSAAoJEAUWOMCIZelDQpAH/As2BD6qDZvbKH5NPHjjDv6T -KBJ6/0h+x2k4Iy3GelrtaogB4LvUDzci4MRfaTXr23Xr+rhwsx3J2xvVdWKZgPXh -bQm5pTJFhiao6Dh+Orway6TLmuaEBLNtknatSkjPUPKmkVd/A7kxxkdelDB//yb+ -7k5HGb84T+HU8HBlB00pDITyXv/414egpZGMqWeBXsYDeEYa8KHZlEIO3YI4JrEz -pNW44Q1YcWZ+zxqTDrvMgjW8KJZcXg6ijJ3fEhGBo+hcnF+WuUB3Yd3Frf0ps5J5 -MjnWXl/uOobML6K70g2UQcHcEDbPk8f9LUxX1++/I0aHsRMGMYhRj0ad5KYE1IY= -=VCEv ------END PGP SIGNATURE----- -``` -- cgit v1.1
Difficulty%u
Good results%u / %u (%.1f %%)