/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* Additional permission under GNU GPL version 3 section 7
*
* If you modify this Program, or any covered work, by linking or combining
* it with OpenSSL (or a modified version of that library), containing parts
* covered by the terms of OpenSSL License and SSLeay License, the licensors
* of this Program grant you additional permission to convey the resulting work.
*
*/
#include "minethd.hpp"
#include "autoAdjust.hpp"
#include "amd_gpu/gpu.hpp"
#include "xmrstak/backend/cpu/crypto/cryptonight_altivec.h"
#include "xmrstak/backend/cpu/crypto/cryptonight.h"
#include "xmrstak/misc/configEditor.hpp"
#include "xmrstak/misc/console.hpp"
#include "xmrstak/backend/cpu/minethd.hpp"
#include "xmrstak/jconf.hpp"
#include "xmrstak/misc/executor.hpp"
#include "xmrstak/misc/environment.hpp"
#include "xmrstak/params.hpp"
#include "xmrstak/backend/cpu/hwlocMemory.hpp"
#include
#include
#include
#include
#include
namespace xmrstak
{
namespace amd
{
minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg)
{
this->backendType = iBackend::AMD;
oWork = pWork;
bQuit = 0;
iThreadNo = (uint8_t)iNo;
iJobNo = 0;
iHashCount = 0;
iTimestamp = 0;
pGpuCtx = ctx;
this->affinity = cfg.cpu_aff;
std::unique_lock lck(thd_aff_set);
std::future order_guard = order_fix.get_future();
oWorkThd = std::thread(&minethd::work_main, this);
order_guard.wait();
if(affinity >= 0) //-1 means no affinity
if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
}
extern "C" {
#ifdef WIN32
__declspec(dllexport)
#endif
std::vector* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, environment& env)
{
environment::inst(&env);
return amd::minethd::thread_starter(threadOffset, pWork);
}
} // extern "C"
bool minethd::init_gpus()
{
size_t i, n = jconf::inst()->GetThreadCount();
printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while...");
vGpuData.resize(n);
jconf::thd_cfg cfg;
for(i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);
vGpuData[i].deviceIdx = cfg.index;
vGpuData[i].rawIntensity = cfg.intensity;
vGpuData[i].workSize = cfg.w_size;
vGpuData[i].stridedIndex = cfg.stridedIndex;
vGpuData[i].memChunk = cfg.memChunk;
vGpuData[i].compMode = cfg.compMode;
}
return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS;
}
std::vector minethd::vGpuData;
std::vector* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
{
std::vector* pvThreads = new std::vector();
if(!configEditor::file_exist(params::inst().configFileAMD))
{
autoAdjust adjust;
if(!adjust.printConfig())
return pvThreads;
}
if(!jconf::inst()->parse_config())
{
win_exit();
}
// \ todo get device count and exit if no opencl device
if(!init_gpus())
{
printer::inst()->print_msg(L1, "WARNING: AMD device not found");
return pvThreads;
}
size_t i, n = jconf::inst()->GetThreadCount();
pvThreads->reserve(n);
jconf::thd_cfg cfg;
for (i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);
if(cfg.cpu_aff >= 0)
{
#if defined(__APPLE__)
printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
#endif
printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
}
else
printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);
minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
pvThreads->push_back(thd);
}
return pvThreads;
}
void minethd::switch_work(miner_work& pWork)
{
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
// faster than threads can consume them. This should never happen in real life.
// Pool cant physically send jobs faster than every 250ms or so due to net latency.
while (globalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < globalStates::inst().iThreadCount)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
globalStates::inst().oGlobalWork = pWork;
globalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst);
globalStates::inst().iGlobalJobNo++;
}
void minethd::consume_work()
{
memcpy(&oWork, &globalStates::inst().oGlobalWork, sizeof(miner_work));
iJobNo++;
globalStates::inst().iConsumeCnt++;
}
void minethd::work_main()
{
if(affinity >= 0) //-1 means no affinity
bindMemoryToNUMANode(affinity);
order_fix.set_value();
std::unique_lock lck(thd_aff_set);
lck.release();
std::this_thread::yield();
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
cpu_ctx = cpu::minethd::minethd_alloc_ctx();
// start with root algorithm and switch later if fork version is reached
auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot();
cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
globalStates::inst().iConsumeCnt++;
uint8_t version = 0;
while (bQuit == 0)
{
if (oWork.bStall)
{
/* We are stalled here because the executor didn't find a job for us yet,
* either because of network latency, or a socket problem. Since we are
* raison d'etre of this software it us sensible to just wait until we have something
*/
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
consume_work();
continue;
}
uint8_t new_version = oWork.getVersion();
if(new_version != version)
{
if(new_version >= ::jconf::inst()->GetMiningForkHeight())
{
miner_algo = ::jconf::inst()->GetMiningAlgo();
hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
}
version = new_version;
}
uint32_t h_per_round = pGpuCtx->rawIntensity;
size_t round_ctr = 0;
assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
uint64_t target = oWork.iTarget;
XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo, version);
if(oWork.bNiceHash)
pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
//Allocate a new nonce every 16 rounds
if((round_ctr++ & 0xF) == 0)
{
globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
}
cl_uint results[0x100];
memset(results,0,sizeof(cl_uint)*(0x100));
XMRRunJob(pGpuCtx, results, miner_algo, version);
for(size_t i = 0; i < results[0xFF]; i++)
{
uint8_t bWorkBlob[112];
uint8_t bResult[32];
memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
memset(bResult, 0, sizeof(job_result::bResult));
*(uint32_t*)(bWorkBlob + 39) = results[i];
hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo), oWork.iPoolId));
else
executor::inst()->push_event(ex_event("AMD Invalid Result", pGpuCtx->deviceIdx, oWork.iPoolId));
}
iCount += pGpuCtx->rawIntensity;
uint64_t iStamp = get_timestamp_ms();
iHashCount.store(iCount, std::memory_order_relaxed);
iTimestamp.store(iStamp, std::memory_order_relaxed);
std::this_thread::yield();
}
consume_work();
}
}
} // namespace amd
} // namespace xmrstak