diff options
Diffstat (limited to 'xmrstak/backend/amd/minethd.cpp')
-rw-r--r-- | xmrstak/backend/amd/minethd.cpp | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp new file mode 100644 index 0000000..f2f5ff4 --- /dev/null +++ b/xmrstak/backend/amd/minethd.cpp @@ -0,0 +1,237 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <thread> + +#include "../../ConfigEditor.hpp" +#include "autoAdjust.hpp" + +#include <vector> +#include "../../console.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../cpu/minethd.h" +#include "../../jconf.h" + +#include "../../executor.h" +#include "minethd.h" +#include "../../jconf.h" +#include "../../crypto/cryptonight.h" +#include "../../Environment.hpp" +#include "../../Params.hpp" +#include "amd_gpu/gpu.h" + + +namespace xmrstak +{ +namespace amd +{ + +minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + iHashCount = 0; + iTimestamp = 0; + pGpuCtx = ctx; + + oWorkThd = std::thread(&minethd::work_main, this); +} + +extern "C" { +#ifdef WIN32 +__declspec(dllexport) +#endif +std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, Environment& env) +{ + Environment::inst() = env; + return amd::minethd::thread_starter(threadOffset, pWork); +} +} // extern "C" + +bool minethd::init_gpus() +{ + size_t i, n = jconf::inst()->GetThreadCount(); + + printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while..."); + vGpuData.resize(n); + + jconf::thd_cfg cfg; + for(i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + vGpuData[i].deviceIdx = cfg.index; + vGpuData[i].rawIntensity = cfg.intensity; + vGpuData[i].workSize = cfg.w_size; + } + + return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; +} + +std::vector<GpuContext> minethd::vGpuData; + +std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + + if(!ConfigEditor::file_exist(Params::inst().configFileAMD)) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } + + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + // \ todo get device count and exit if no opencl device + + if(!init_gpus()) + { + printer::inst()->print_msg(L1, "WARNING: AMD device not found"); + return pvThreads; + } + + size_t i, n = jconf::inst()->GetThreadCount(); + pvThreads->reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]); + + if(cfg.cpu_aff >= 0) + { +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); + } + + pvThreads->push_back(thd); + if(cfg.cpu_aff >= 0) + printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); + else + printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); + } + + return pvThreads; +} + +void minethd::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (GlobalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::inst().iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + GlobalStates::inst().oGlobalWork = pWork; + GlobalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst); + GlobalStates::inst().iGlobalJobNo++; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::inst().oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::inst().iConsumeCnt++; + +} + +void minethd::work_main() +{ + uint64_t iCount = 0; + + cryptonight_ctx* cpu_ctx; + cpu_ctx = cpu::minethd::minethd_alloc_ctx(); + cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + + GlobalStates::inst().iConsumeCnt++; + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); + uint32_t target = oWork.iTarget32; + XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); + + while(GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + cl_uint results[0x100]; + memset(results,0,sizeof(cl_uint)*(0x100)); + + XMRRunJob(pGpuCtx, results); + + for(size_t i = 0; i < results[0xFF]; i++) + { + uint8_t bWorkBlob[112]; + uint8_t bResult[32]; + + memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memset(bResult, 0, sizeof(job_result::bResult)); + + *(uint32_t*)(bWorkBlob + 39) = results[i]; + + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx); + if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId)); + else + executor::inst()->log_result_error("AMD Invalid Result"); + } + + iCount += pGpuCtx->rawIntensity; + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + std::this_thread::yield(); + } + + consume_work(); + } +} + +} // namespace amd +} // namespace xmrstak |