summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/amd/minethd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'xmrstak/backend/amd/minethd.cpp')
-rw-r--r--xmrstak/backend/amd/minethd.cpp237
1 files changed, 237 insertions, 0 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
new file mode 100644
index 0000000..f2f5ff4
--- /dev/null
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -0,0 +1,237 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Additional permission under GNU GPL version 3 section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining
+ * it with OpenSSL (or a modified version of that library), containing parts
+ * covered by the terms of OpenSSL License and SSLeay License, the licensors
+ * of this Program grant you additional permission to convey the resulting work.
+ *
+ */
+
+#include <assert.h>
+#include <cmath>
+#include <chrono>
+#include <thread>
+
+#include "../../ConfigEditor.hpp"
+#include "autoAdjust.hpp"
+
+#include <vector>
+#include "../../console.h"
+#include "../../crypto/cryptonight_aesni.h"
+#include "../cpu/minethd.h"
+#include "../../jconf.h"
+
+#include "../../executor.h"
+#include "minethd.h"
+#include "../../jconf.h"
+#include "../../crypto/cryptonight.h"
+#include "../../Environment.hpp"
+#include "../../Params.hpp"
+#include "amd_gpu/gpu.h"
+
+
+namespace xmrstak
+{
+namespace amd
+{
+
+minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
+{
+ oWork = pWork;
+ bQuit = 0;
+ iThreadNo = (uint8_t)iNo;
+ iJobNo = 0;
+ iHashCount = 0;
+ iTimestamp = 0;
+ pGpuCtx = ctx;
+
+ oWorkThd = std::thread(&minethd::work_main, this);
+}
+
+extern "C" {
+#ifdef WIN32
+__declspec(dllexport)
+#endif
+std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, Environment& env)
+{
+ Environment::inst() = env;
+ return amd::minethd::thread_starter(threadOffset, pWork);
+}
+} // extern "C"
+
+bool minethd::init_gpus()
+{
+ size_t i, n = jconf::inst()->GetThreadCount();
+
+ printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while...");
+ vGpuData.resize(n);
+
+ jconf::thd_cfg cfg;
+ for(i = 0; i < n; i++)
+ {
+ jconf::inst()->GetThreadConfig(i, cfg);
+ vGpuData[i].deviceIdx = cfg.index;
+ vGpuData[i].rawIntensity = cfg.intensity;
+ vGpuData[i].workSize = cfg.w_size;
+ }
+
+ return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS;
+}
+
+std::vector<GpuContext> minethd::vGpuData;
+
+std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
+{
+ std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>();
+
+ if(!ConfigEditor::file_exist(Params::inst().configFileAMD))
+ {
+ autoAdjust adjust;
+ if(!adjust.printConfig())
+ return pvThreads;
+ }
+
+ if(!jconf::inst()->parse_config())
+ {
+ win_exit();
+ }
+
+ // \ todo get device count and exit if no opencl device
+
+ if(!init_gpus())
+ {
+ printer::inst()->print_msg(L1, "WARNING: AMD device not found");
+ return pvThreads;
+ }
+
+ size_t i, n = jconf::inst()->GetThreadCount();
+ pvThreads->reserve(n);
+
+ jconf::thd_cfg cfg;
+ for (i = 0; i < n; i++)
+ {
+ jconf::inst()->GetThreadConfig(i, cfg);
+ minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]);
+
+ if(cfg.cpu_aff >= 0)
+ {
+#if defined(__APPLE__)
+ printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
+#endif
+ cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
+ }
+
+ pvThreads->push_back(thd);
+ if(cfg.cpu_aff >= 0)
+ printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
+ else
+ printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
+ }
+
+ return pvThreads;
+}
+
+void minethd::switch_work(miner_work& pWork)
+{
+ // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
+ // faster than threads can consume them. This should never happen in real life.
+ // Pool cant physically send jobs faster than every 250ms or so due to net latency.
+
+ while (GlobalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::inst().iThreadCount)
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+ GlobalStates::inst().oGlobalWork = pWork;
+ GlobalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst);
+ GlobalStates::inst().iGlobalJobNo++;
+}
+
+void minethd::consume_work()
+{
+ memcpy(&oWork, &GlobalStates::inst().oGlobalWork, sizeof(miner_work));
+ iJobNo++;
+ GlobalStates::inst().iConsumeCnt++;
+
+}
+
+void minethd::work_main()
+{
+ uint64_t iCount = 0;
+
+ cryptonight_ctx* cpu_ctx;
+ cpu_ctx = cpu::minethd::minethd_alloc_ctx();
+ cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
+
+ GlobalStates::inst().iConsumeCnt++;
+
+ while (bQuit == 0)
+ {
+ if (oWork.bStall)
+ {
+ /* We are stalled here because the executor didn't find a job for us yet,
+ either because of network latency, or a socket problem. Since we are
+ raison d'etre of this software it us sensible to just wait until we have something*/
+
+ while (GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+ consume_work();
+ continue;
+ }
+
+ assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
+ pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
+ uint32_t target = oWork.iTarget32;
+ XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
+
+ while(GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+ {
+ cl_uint results[0x100];
+ memset(results,0,sizeof(cl_uint)*(0x100));
+
+ XMRRunJob(pGpuCtx, results);
+
+ for(size_t i = 0; i < results[0xFF]; i++)
+ {
+ uint8_t bWorkBlob[112];
+ uint8_t bResult[32];
+
+ memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
+ memset(bResult, 0, sizeof(job_result::bResult));
+
+ *(uint32_t*)(bWorkBlob + 39) = results[i];
+
+ hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
+ if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
+ executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId));
+ else
+ executor::inst()->log_result_error("AMD Invalid Result");
+ }
+
+ iCount += pGpuCtx->rawIntensity;
+ using namespace std::chrono;
+ uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
+ iHashCount.store(iCount, std::memory_order_relaxed);
+ iTimestamp.store(iStamp, std::memory_order_relaxed);
+ std::this_thread::yield();
+ }
+
+ consume_work();
+ }
+}
+
+} // namespace amd
+} // namespace xmrstak
OpenPOWER on IntegriCloud