diff options
author | psychocrypt <psychocrypt@users.noreply.github.com> | 2017-09-29 20:32:31 +0200 |
---|---|---|
committer | psychocrypt <psychocrypt@users.noreply.github.com> | 2017-09-30 23:46:08 +0200 |
commit | cc429b68fadc502b981fd0acd64a5ff6e2ae1d15 (patch) | |
tree | 3fb23fc4db15dbdd08af4c7ea20134b9d82e58fd /xmrstak/backend/nvidia/minethd.cpp | |
parent | e5b0319d5a9f58762fa934ad700113908940cb31 (diff) | |
download | xmr-stak-cc429b68fadc502b981fd0acd64a5ff6e2ae1d15.zip xmr-stak-cc429b68fadc502b981fd0acd64a5ff6e2ae1d15.tar.gz |
group files
- move source code to `src`
- categorize files and move to group folder
- change upper case class files to lower case
- change C++ header to `*.hpp`
Diffstat (limited to 'xmrstak/backend/nvidia/minethd.cpp')
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp new file mode 100644 index 0000000..cbee219 --- /dev/null +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -0,0 +1,273 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <thread> +#include <bitset> +#include <vector> +#include "../../console.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../cpu/minethd.h" +#include "../../Params.hpp" + +#include "../../executor.h" +#include "minethd.h" +#include "../../jconf.h" +#include "../../crypto/cryptonight.h" +#include "../../Environment.hpp" +#include "autoAdjust.hpp" + + +#ifndef USE_PRECOMPILED_HEADERS +#ifdef WIN32 +#include <direct.h> +#include <windows.h> +#else +#include <sys/types.h> +#include <dlfcn.h> +#endif +#include <iostream> +#endif + +namespace xmrstak +{ +namespace nvidia +{ + +#ifdef WIN32 + HINSTANCE lib_handle; +#else + void *lib_handle; +#endif + +minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + + ctx.device_id = (int)cfg.id; + ctx.device_blocks = (int)cfg.blocks; + ctx.device_threads = (int)cfg.threads; + ctx.device_bfactor = (int)cfg.bfactor; + ctx.device_bsleep = (int)cfg.bsleep; + + oWorkThd = std::thread(&minethd::work_main, this); +} + + +bool minethd::self_test() +{ + cryptonight_ctx* ctx0; + unsigned char out[32]; + bool bResult = true; + + ctx0 = new cryptonight_ctx; + if(::jconf::inst()->HaveHardwareAes()) + { + //cryptonight_hash_ctx("This is a test", 14, out, ctx0); + bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + } + else + { + //cryptonight_hash_ctx_soft("This is a test", 14, out, ctx0); + bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + } + delete ctx0; + + //if(!bResult) + // printer::inst()->print_msg(L0, + // "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations."); + + return bResult; +} + + +extern "C" +{ +#ifdef WIN32 +__declspec(dllexport) +#endif +std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, Environment& env) +{ + Environment::inst() = env; + return nvidia::minethd::thread_starter(threadOffset, pWork); +} +} // extern "C" + +std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + + if(!ConfigEditor::file_exist(Params::inst().configFileNVIDIA)) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } + + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + int deviceCount = 0; + if(cuda_get_devicecount(&deviceCount) != 1) + { + std::cout<<"WARNING: NVIDIA no device found"<<std::endl; + return pvThreads; + } + + size_t i, n = jconf::inst()->GetGPUThreadCount(); + pvThreads->reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetGPUThreadConfig(i, cfg); + minethd* thd = new minethd(pWork, i + threadOffset, cfg); + + if(cfg.cpu_aff >= 0) + { +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); + } + + pvThreads->push_back(thd); + + if(cfg.cpu_aff >= 0) + printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); + else + printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); + } + + return pvThreads; +} + +void minethd::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (GlobalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::inst().iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + GlobalStates::inst().oGlobalWork = pWork; + GlobalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst); + GlobalStates::inst().iGlobalJobNo++; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::inst().oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::inst().iConsumeCnt++; +} + +void minethd::work_main() +{ + uint64_t iCount = 0; + uint32_t iNonce; + cryptonight_ctx* cpu_ctx; + cpu_ctx = cpu::minethd::minethd_alloc_ctx(); + cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + + GlobalStates::inst().iConsumeCnt++; + + if(/*cuda_get_deviceinfo(&ctx) != 1 ||*/ cryptonight_extra_cpu_init(&ctx) != 1) + { + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); + std::exit(0); + } + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize); + iNonce = calc_start_nonce(oWork.iResumeCnt); + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + + while(GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + + uint32_t foundNonce[10]; + uint32_t foundCount; + + cryptonight_extra_cpu_prepare(&ctx, iNonce); + cryptonight_core_cpu_hash(&ctx); + cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce); + + for(size_t i = 0; i < foundCount; i++) + { + + uint8_t bWorkBlob[112]; + uint8_t bResult[32]; + + memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memset(bResult, 0, sizeof(job_result::bResult)); + + *(uint32_t*)(bWorkBlob + 39) = foundNonce[i]; + + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx); + if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult), oWork.iPoolId)); + else + executor::inst()->log_result_error("NVIDIA Invalid Result"); + } + + iCount += ctx.device_blocks * ctx.device_threads; + iNonce += ctx.device_blocks * ctx.device_threads; + + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + std::this_thread::yield(); + } + + consume_work(); + } +} + +} // namespace xmrstak + +} //namespace nvidia |