summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/nvidia
diff options
context:
space:
mode:
authorfireice-uk <fireice-uk@users.noreply.github.com>2017-11-15 18:07:24 +0000
committerGitHub <noreply@github.com>2017-11-15 18:07:24 +0000
commit92dcd340d104c3d7ef8e5b386041f1a40184577a (patch)
tree599ff12cd9fc8401b61d67d2242eecb8f4a321d5 /xmrstak/backend/nvidia
parent6d6c1853939895277a9eeb91b96bdf6d17b813b2 (diff)
parent70737c8d1a909c6d08f6e38e069566ae8af64917 (diff)
downloadxmr-stak-92dcd340d104c3d7ef8e5b386041f1a40184577a.zip
xmr-stak-92dcd340d104c3d7ef8e5b386041f1a40184577a.tar.gz
Merge pull request #116 from psychocrypt/topic-checkIfBinarySupportsGPUArch
check gpu architecture
Diffstat (limited to 'xmrstak/backend/nvidia')
-rw-r--r--xmrstak/backend/nvidia/minethd.cpp2
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu48
2 files changed, 49 insertions, 1 deletions
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 5170305..4d9efd2 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -220,7 +220,7 @@ void minethd::work_main()
globalStates::inst().iConsumeCnt++;
- if(/*cuda_get_deviceinfo(&ctx) != 1 ||*/ cryptonight_extra_cpu_init(&ctx) != 1)
+ if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
{
printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
std::exit(0);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 61d45ed..9923cb2 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -1,6 +1,9 @@
#include <stdio.h>
#include <stdint.h>
#include <string.h>
+#include <sstream>
+#include <algorithm>
+#include <vector>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_functions.hpp>
@@ -277,6 +280,7 @@ extern "C" int cuda_get_devicecount( int* deviceCount)
* 2 = gpu cannot be selected,
* 3 = context cannot be created
* 4 = not enough memory
+ * 5 = architecture not supported (not compiled for the gpu architecture)
*/
extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
{
@@ -321,8 +325,52 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
ctx->device_arch[0] = props.major;
ctx->device_arch[1] = props.minor;
+ const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
ctx->name = std::string(props.name);
+ std::vector<int> arch;
+#define XMRSTAK_PP_TOSTRING1(str) #str
+#define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str)
+ char const * archStringList = XMRSTAK_PP_TOSTRING(XMRSTAK_CUDA_ARCH_LIST);
+#undef XMRSTAK_PP_TOSTRING
+#undef XMRSTAK_PP_TOSTRING1
+ std::stringstream ss(archStringList);
+
+ //transform string list sperated with `+` into a vector of integers
+ int tmpArch;
+ while ( ss >> tmpArch )
+ arch.push_back( tmpArch );
+
+ if(gpuArch >= 20 && gpuArch < 30)
+ {
+ // compiled binary must support sm_20 for fermi
+ std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20);
+ if(it == arch.end())
+ {
+ printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+ return 5;
+ }
+ }
+ if(gpuArch >= 30)
+ {
+ // search the minimum architecture greater than sm_20
+ int minSupportedArch = 0;
+ /* - for newer architecture than fermi we need at least sm_30
+ * or a architecture >= gpuArch
+ * - it is not possible to use a gpu with a architecture >= 30
+ * with a sm_20 only compiled binary
+ */
+ for(int i = 0; i < arch.size(); ++i)
+ if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch))
+ minSupportedArch = arch[i];
+ if(minSupportedArch >= 30 && gpuArch <= minSupportedArch)
+ {
+ printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+ return 5;
+ }
+ }
+
// set all evice option those marked as auto (-1) to a valid value
if(ctx->device_blocks == -1)
{
OpenPOWER on IntegriCloud