summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend
diff options
context:
space:
mode:
Diffstat (limited to 'xmrstak/backend')
-rw-r--r--xmrstak/backend/nvidia/autoAdjust.hpp2
-rw-r--r--xmrstak/backend/nvidia/config.tpl10
-rw-r--r--xmrstak/backend/nvidia/jconf.cpp11
-rw-r--r--xmrstak/backend/nvidia/jconf.hpp1
-rw-r--r--xmrstak/backend/nvidia/minethd.cpp1
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp3
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_core.cu28
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp37
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu28
9 files changed, 92 insertions, 29 deletions
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index c6a7dca..be7d1ce 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -95,7 +95,7 @@ private:
conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
" \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
" \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" +
- " \"affine_to_cpu\" : false,\n" +
+ " \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" +
" },\n";
}
}
diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl
index 99dc023..5479172 100644
--- a/xmrstak/backend/nvidia/config.tpl
+++ b/xmrstak/backend/nvidia/config.tpl
@@ -9,6 +9,12 @@ R"===(
* bsleep - Insert a delay of X microseconds between kernel launches.
* Increase if you want to reduce GPU lag. Recommended setting on GUI systems - 100
* affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner.
+ * sync_mode - method used to synchronize the device
+ * documentation: http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac
+ * 0 = cudaDeviceScheduleAuto
+ * 1 = cudaDeviceScheduleSpin - create a high load on one cpu thread per gpu
+ * 2 = cudaDeviceScheduleYield
+ * 3 = cudaDeviceScheduleBlockingSync (default)
*
* On the first run the miner will look at your system and suggest a basic configuration that will work,
* you can try to tweak it from there to get the best performance.
@@ -16,7 +22,9 @@ R"===(
* A filled out configuration should look like this:
* "gpu_threads_conf" :
* [
- * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false},
+ * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
+ * "affine_to_cpu" : false, "sync_mode" : 3,
+ * },
* ],
*/
diff --git a/xmrstak/backend/nvidia/jconf.cpp b/xmrstak/backend/nvidia/jconf.cpp
index 4208145..46c5726 100644
--- a/xmrstak/backend/nvidia/jconf.cpp
+++ b/xmrstak/backend/nvidia/jconf.cpp
@@ -123,16 +123,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg)
if(!oThdConf.IsObject())
return false;
- const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff;
+ const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff, *syncMode;
gid = GetObjectMember(oThdConf, "index");
blocks = GetObjectMember(oThdConf, "blocks");
threads = GetObjectMember(oThdConf, "threads");
bfactor = GetObjectMember(oThdConf, "bfactor");
bsleep = GetObjectMember(oThdConf, "bsleep");
aff = GetObjectMember(oThdConf, "affine_to_cpu");
+ syncMode = GetObjectMember(oThdConf, "sync_mode");
if(gid == nullptr || blocks == nullptr || threads == nullptr ||
- bfactor == nullptr || bsleep == nullptr || aff == nullptr)
+ bfactor == nullptr || bsleep == nullptr || aff == nullptr || syncMode == nullptr)
{
return false;
}
@@ -155,11 +156,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg)
if(!aff->IsUint64() && !aff->IsBool())
return false;
+ if(!syncMode->IsNumber() || syncMode->GetInt() < 0 || syncMode->GetInt() > 3)
+ {
+ printer::inst()->print_msg(L0, "Error NVIDIA: sync_mode out of range or no number. ( range: 0 <= sync_mode < 4.)");
+ return false;
+ }
cfg.id = gid->GetInt();
cfg.blocks = blocks->GetInt();
cfg.threads = threads->GetInt();
cfg.bfactor = bfactor->GetInt();
cfg.bsleep = bsleep->GetInt();
+ cfg.syncMode = syncMode->GetInt();
if(aff->IsNumber())
cfg.cpu_aff = aff->GetInt();
diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp
index b09a162..7f60f1d 100644
--- a/xmrstak/backend/nvidia/jconf.hpp
+++ b/xmrstak/backend/nvidia/jconf.hpp
@@ -28,6 +28,7 @@ public:
bool bDoubleMode;
bool bNoPrefetch;
int32_t cpu_aff;
+ int syncMode;
long long iCpuAff;
};
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 9eab1c0..6e628fd 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -77,6 +77,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
ctx.device_threads = (int)cfg.threads;
ctx.device_bfactor = (int)cfg.bfactor;
ctx.device_bsleep = (int)cfg.bsleep;
+ ctx.syncMode = cfg.syncMode;
this->affinity = cfg.cpu_aff;
std::unique_lock<std::mutex> lck(thd_aff_set);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
index 1b63379..afbdbaf 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
@@ -11,7 +11,8 @@ typedef struct {
int device_blocks;
int device_threads;
int device_bfactor;
- int device_bsleep;
+ int device_bsleep;
+ int syncMode;
uint32_t *d_input;
uint32_t inputlen;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index dba6676..0b175b5 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -327,18 +327,22 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
for ( int i = 0; i < partcount; i++ )
{
- CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
- grid,
- block4,
- block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
- >>>(
- ctx->device_blocks*ctx->device_threads,
- ctx->device_bfactor,
- i,
- ctx->d_long_state,
- ctx->d_ctx_a,
- ctx->d_ctx_b
- ));
+ CUDA_CHECK_MSG_KERNEL(
+ ctx->device_id,
+ "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**",
+ cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
+ grid,
+ block4,
+ block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
+ >>>(
+ ctx->device_blocks*ctx->device_threads,
+ ctx->device_bfactor,
+ i,
+ ctx->d_long_state,
+ ctx->d_ctx_a,
+ ctx->d_ctx_b
+ )
+ );
if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
}
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
index 078c165..563bb3b 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
@@ -9,22 +9,41 @@
/** execute and check a CUDA api command
*
* @param id gpu id (thread id)
+ * @param msg message string which should be added to the error message
* @param ... CUDA api command
*/
-#define CUDA_CHECK(id, ...) { \
- cudaError_t error = __VA_ARGS__; \
- if(error!=cudaSuccess){ \
- std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__ << std::endl; \
- throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \
- } \
-} \
+#define CUDA_CHECK_MSG(id, msg, ...) { \
+ cudaError_t error = __VA_ARGS__; \
+ if(error!=cudaSuccess){ \
+ std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__; \
+ std::cerr << msg << std::endl; \
+ throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \
+ } \
+} \
( (void) 0 )
+/** execute and check a CUDA api command
+ *
+ * @param id gpu id (thread id)
+ * @param ... CUDA api command
+ */
+#define CUDA_CHECK(id, ...) CUDA_CHECK_MSG(id, "", __VA_ARGS__)
+
/** execute and check a CUDA kernel
*
* @param id gpu id (thread id)
* @param ... CUDA kernel call
*/
-#define CUDA_CHECK_KERNEL(id, ...) \
- __VA_ARGS__; \
+#define CUDA_CHECK_KERNEL(id, ...) \
+ __VA_ARGS__; \
CUDA_CHECK(id, cudaGetLastError())
+
+/** execute and check a CUDA kernel
+ *
+ * @param id gpu id (thread id)
+ * @param msg message string which should be added to the error message
+ * @param ... CUDA kernel call
+ */
+#define CUDA_CHECK_MSG_KERNEL(id, msg, ...) \
+ __VA_ARGS__; \
+ CUDA_CHECK_MSG(id, msg, cudaGetLastError())
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 333ae73..492201d 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -189,7 +189,22 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
}
cudaDeviceReset();
- cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ switch(ctx->syncMode)
+ {
+ case 0:
+ cudaSetDeviceFlags(cudaDeviceScheduleAuto);
+ break;
+ case 1:
+ cudaSetDeviceFlags(cudaDeviceScheduleSpin);
+ break;
+ case 2:
+ cudaSetDeviceFlags(cudaDeviceScheduleYield);
+ break;
+ case 3:
+ cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ break;
+
+ };
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
size_t hashMemSize;
@@ -203,7 +218,6 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
}
size_t wsize = ctx->device_blocks * ctx->device_threads;
- CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize));
@@ -213,6 +227,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) ));
+ CUDA_CHECK_MSG(
+ ctx->device_id,
+ "\n**suggestion: Try to reduce the value of the attribute 'threads' in the NVIDIA config file.**",
+ cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
return 1;
}
@@ -239,7 +257,11 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce,
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) ));
- CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state ));
+ CUDA_CHECK_MSG_KERNEL(
+ ctx->device_id,
+ "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
+ cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )
+ );
CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
OpenPOWER on IntegriCloud