summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend
diff options
context:
space:
mode:
Diffstat (limited to 'xmrstak/backend')
-rw-r--r--xmrstak/backend/amd/minethd.cpp6
-rw-r--r--xmrstak/backend/amd/minethd.hpp1
-rw-r--r--xmrstak/backend/cpu/minethd.cpp7
-rw-r--r--xmrstak/backend/cpu/minethd.hpp1
-rw-r--r--xmrstak/backend/nvidia/autoAdjust.hpp13
-rw-r--r--xmrstak/backend/nvidia/minethd.cpp8
-rw-r--r--xmrstak/backend/nvidia/minethd.hpp1
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu91
8 files changed, 112 insertions, 16 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index ce62565..de0b6e8 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -59,6 +59,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::th
pGpuCtx = ctx;
this->affinity = cfg.cpu_aff;
+ std::unique_lock<std::mutex> lck(thd_aff_set);
std::future<void> order_guard = order_fix.get_future();
oWorkThd = std::thread(&minethd::work_main, this);
@@ -180,6 +181,9 @@ void minethd::work_main()
bindMemoryToNUMANode(affinity);
order_fix.set_value();
+ std::unique_lock<std::mutex> lck(thd_aff_set);
+ lck.release();
+ std::this_thread::yield();
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
@@ -239,7 +243,7 @@ void minethd::work_main()
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo), oWork.iPoolId));
else
- executor::inst()->log_result_error("AMD Invalid Result");
+ executor::inst()->push_event(ex_event("AMD Invalid Result", oWork.iPoolId));
}
iCount += pGpuCtx->rawIntensity;
diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
index c4c2ce6..c808192 100644
--- a/xmrstak/backend/amd/minethd.hpp
+++ b/xmrstak/backend/amd/minethd.hpp
@@ -38,6 +38,7 @@ private:
miner_work oWork;
std::promise<void> order_fix;
+ std::mutex thd_aff_set;
std::thread oWorkThd;
int64_t affinity;
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index 8b0eec1..03071c4 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -102,6 +102,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, bool double_work, bool no_prefet
bNoPrefetch = no_prefetch;
this->affinity = affinity;
+ std::unique_lock<std::mutex> lck(thd_aff_set);
std::future<void> order_guard = order_fix.get_future();
if(double_work)
@@ -340,6 +341,9 @@ void minethd::work_main()
bindMemoryToNUMANode(affinity);
order_fix.set_value();
+ std::unique_lock<std::mutex> lck(thd_aff_set);
+ lck.release();
+ std::this_thread::yield();
cn_hash_fun hash_fun;
cryptonight_ctx* ctx;
@@ -468,6 +472,9 @@ void minethd::double_work_main()
bindMemoryToNUMANode(affinity);
order_fix.set_value();
+ std::unique_lock<std::mutex> lck(thd_aff_set);
+ lck.release();
+ std::this_thread::yield();
cn_hash_fun_dbl hash_fun;
cryptonight_ctx* ctx0;
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 670ec8d..5520d9e 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -46,6 +46,7 @@ private:
miner_work oWork;
std::promise<void> order_fix;
+ std::mutex thd_aff_set;
std::thread oWorkThd;
int64_t affinity;
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index bf04518..d36a46a 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -60,17 +60,15 @@ public:
ctx.device_bfactor = 6;
ctx.device_bsleep = 25;
#endif
- if( cuda_get_deviceinfo(&ctx) != 1 )
- {
- printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", i);
- std::exit(0);
- }
- nvidCtxVec.push_back(ctx);
+ if(cuda_get_deviceinfo(&ctx) == 0)
+ nvidCtxVec.push_back(ctx);
+ else
+ printer::inst()->print_msg(L0, "WARNING: NVIDIA setup failed for GPU %d.\n", i);
}
generateThreadConfig();
- return true;
+ return true;
}
@@ -94,6 +92,7 @@ private:
{
conf += std::string(" // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n";
conf += std::string(" // memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/" + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n";
+ conf += std::string(" // smx: ") + std::to_string(ctx.device_mpcount) + "\n";
conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
" \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
" \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" +
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index e15fc56..9dbd83e 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -79,6 +79,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
ctx.device_bsleep = (int)cfg.bsleep;
this->affinity = cfg.cpu_aff;
+ std::unique_lock<std::mutex> lck(thd_aff_set);
std::future<void> order_guard = order_fix.get_future();
oWorkThd = std::thread(&minethd::work_main, this);
@@ -207,6 +208,9 @@ void minethd::work_main()
bindMemoryToNUMANode(affinity);
order_fix.set_value();
+ std::unique_lock<std::mutex> lck(thd_aff_set);
+ lck.release();
+ std::this_thread::yield();
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
@@ -216,7 +220,7 @@ void minethd::work_main()
globalStates::inst().iConsumeCnt++;
- if(/*cuda_get_deviceinfo(&ctx) != 1 ||*/ cryptonight_extra_cpu_init(&ctx) != 1)
+ if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
{
printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
std::exit(0);
@@ -281,7 +285,7 @@ void minethd::work_main()
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo), oWork.iPoolId));
else
- executor::inst()->log_result_error("NVIDIA Invalid Result");
+ executor::inst()->push_event(ex_event("NVIDIA Invalid Result", oWork.iPoolId));
}
iCount += h_per_round;
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index f6d989c..ffcdab1 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -45,6 +45,7 @@ private:
miner_work oWork;
std::promise<void> order_fix;
+ std::mutex thd_aff_set;
std::thread oWorkThd;
int64_t affinity;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index e18532f..9923cb2 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -1,6 +1,9 @@
#include <stdio.h>
#include <stdint.h>
#include <string.h>
+#include <sstream>
+#include <algorithm>
+#include <vector>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_functions.hpp>
@@ -270,6 +273,15 @@ extern "C" int cuda_get_devicecount( int* deviceCount)
return 1;
}
+/** get device information
+ *
+ * @return 0 = all OK,
+ * 1 = something went wrong,
+ * 2 = gpu cannot be selected,
+ * 3 = context cannot be created
+ * 4 = not enough memory
+ * 5 = architecture not supported (not compiled for the gpu architecture)
+ */
extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
{
cudaError_t err;
@@ -279,25 +291,25 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
if(err != cudaSuccess)
{
printf("Unable to query CUDA driver version! Is an nVidia driver installed?\n");
- return 0;
+ return 1;
}
if(version < CUDART_VERSION)
{
printf("Driver does not support CUDA %d.%d API! Update your nVidia driver!\n", CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10);
- return 0;
+ return 1;
}
int GPU_N;
if(cuda_get_devicecount(&GPU_N) == 0)
{
- return 0;
+ return 1;
}
if(ctx->device_id >= GPU_N)
{
printf("Invalid device ID!\n");
- return 0;
+ return 1;
}
cudaDeviceProp props;
@@ -305,7 +317,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
if(err != cudaSuccess)
{
printf("\nGPU %d: %s\n%s line %d\n", ctx->device_id, cudaGetErrorString(err), __FILE__, __LINE__);
- return 0;
+ return 1;
}
ctx->device_name = strdup(props.name);
@@ -313,8 +325,52 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
ctx->device_arch[0] = props.major;
ctx->device_arch[1] = props.minor;
+ const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
ctx->name = std::string(props.name);
+ std::vector<int> arch;
+#define XMRSTAK_PP_TOSTRING1(str) #str
+#define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str)
+ char const * archStringList = XMRSTAK_PP_TOSTRING(XMRSTAK_CUDA_ARCH_LIST);
+#undef XMRSTAK_PP_TOSTRING
+#undef XMRSTAK_PP_TOSTRING1
+ std::stringstream ss(archStringList);
+
+ //transform string list sperated with `+` into a vector of integers
+ int tmpArch;
+ while ( ss >> tmpArch )
+ arch.push_back( tmpArch );
+
+ if(gpuArch >= 20 && gpuArch < 30)
+ {
+ // compiled binary must support sm_20 for fermi
+ std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20);
+ if(it == arch.end())
+ {
+ printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+ return 5;
+ }
+ }
+ if(gpuArch >= 30)
+ {
+ // search the minimum architecture greater than sm_20
+ int minSupportedArch = 0;
+ /* - for newer architecture than fermi we need at least sm_30
+ * or a architecture >= gpuArch
+ * - it is not possible to use a gpu with a architecture >= 30
+ * with a sm_20 only compiled binary
+ */
+ for(int i = 0; i < arch.size(); ++i)
+ if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch))
+ minSupportedArch = arch[i];
+ if(minSupportedArch >= 30 && gpuArch <= minSupportedArch)
+ {
+ printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+ return 5;
+ }
+ }
+
// set all evice option those marked as auto (-1) to a valid value
if(ctx->device_blocks == -1)
{
@@ -347,9 +403,31 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
maxMemUsage = size_t(1024u) * byteToMiB;
}
+ int* tmp;
+ cudaError_t err;
+ // a device must be selected to get the right memory usage later on
+ err = cudaSetDevice(ctx->device_id);
+ if(err != cudaSuccess)
+ {
+ printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
+ return 2;
+ }
+ // trigger that a context on the gpu will be allocated
+ err = cudaMalloc(&tmp, 256);
+ if(err != cudaSuccess)
+ {
+ printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
+ return 3;
+ }
+
+
size_t freeMemory = 0;
size_t totalMemory = 0;
CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
+
+ cudaFree(tmp);
+ // delete created context on the gpu
+ cudaDeviceReset();
ctx->total_device_memory = totalMemory;
ctx->free_device_memory = freeMemory;
@@ -379,6 +457,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
printf("WARNING: NVIDIA GPU %d: already %s MiB memory in use, skip GPU.\n",
ctx->device_id,
std::to_string(usedMem/byteToMiB).c_str());
+ return 4;
}
else
maxMemUsage -= usedMem;
@@ -404,5 +483,5 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
}
- return 1;
+ return 0;
}
OpenPOWER on IntegriCloud