summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-07 22:29:07 +0100
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-07 22:29:07 +0100
commite5127b5ffac67faf74b0e27e6709e0615f4bfa39 (patch)
tree8865199d82f891fda7c7f039d63cf92d43fdf135
parent035c824258f678a6865c69ef1a8b769549937662 (diff)
downloadxmr-stak-e5127b5ffac67faf74b0e27e6709e0615f4bfa39.zip
xmr-stak-e5127b5ffac67faf74b0e27e6709e0615f4bfa39.tar.gz
add message to `CUDA_CHECK...` macros
- add macro `CUDA_CHECK_MSG_KERNEL` and `CUDA_CHECK_MSG` - add suggestion of typicle errors can be solved
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_core.cu28
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp37
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu11
3 files changed, 53 insertions, 23 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index dba6676..0b175b5 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -327,18 +327,22 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
for ( int i = 0; i < partcount; i++ )
{
- CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
- grid,
- block4,
- block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
- >>>(
- ctx->device_blocks*ctx->device_threads,
- ctx->device_bfactor,
- i,
- ctx->d_long_state,
- ctx->d_ctx_a,
- ctx->d_ctx_b
- ));
+ CUDA_CHECK_MSG_KERNEL(
+ ctx->device_id,
+ "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**",
+ cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
+ grid,
+ block4,
+ block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
+ >>>(
+ ctx->device_blocks*ctx->device_threads,
+ ctx->device_bfactor,
+ i,
+ ctx->d_long_state,
+ ctx->d_ctx_a,
+ ctx->d_ctx_b
+ )
+ );
if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
}
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
index 078c165..563bb3b 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp
@@ -9,22 +9,41 @@
/** execute and check a CUDA api command
*
* @param id gpu id (thread id)
+ * @param msg message string which should be added to the error message
* @param ... CUDA api command
*/
-#define CUDA_CHECK(id, ...) { \
- cudaError_t error = __VA_ARGS__; \
- if(error!=cudaSuccess){ \
- std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__ << std::endl; \
- throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \
- } \
-} \
+#define CUDA_CHECK_MSG(id, msg, ...) { \
+ cudaError_t error = __VA_ARGS__; \
+ if(error!=cudaSuccess){ \
+ std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__; \
+ std::cerr << msg << std::endl; \
+ throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \
+ } \
+} \
( (void) 0 )
+/** execute and check a CUDA api command
+ *
+ * @param id gpu id (thread id)
+ * @param ... CUDA api command
+ */
+#define CUDA_CHECK(id, ...) CUDA_CHECK_MSG(id, "", __VA_ARGS__)
+
/** execute and check a CUDA kernel
*
* @param id gpu id (thread id)
* @param ... CUDA kernel call
*/
-#define CUDA_CHECK_KERNEL(id, ...) \
- __VA_ARGS__; \
+#define CUDA_CHECK_KERNEL(id, ...) \
+ __VA_ARGS__; \
CUDA_CHECK(id, cudaGetLastError())
+
+/** execute and check a CUDA kernel
+ *
+ * @param id gpu id (thread id)
+ * @param msg message string which should be added to the error message
+ * @param ... CUDA kernel call
+ */
+#define CUDA_CHECK_MSG_KERNEL(id, msg, ...) \
+ __VA_ARGS__; \
+ CUDA_CHECK_MSG(id, msg, cudaGetLastError())
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 333ae73..bea2634 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -203,7 +203,6 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
}
size_t wsize = ctx->device_blocks * ctx->device_threads;
- CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize));
@@ -213,6 +212,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) ));
+ CUDA_CHECK_MSG(
+ ctx->device_id,
+ "\n**suggestion: Try to reduce the value of the attribute 'threads' in the NVIDIA config file.**",
+ cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
return 1;
}
@@ -239,7 +242,11 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce,
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) ));
- CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state ));
+ CUDA_CHECK_MSG_KERNEL(
+ ctx->device_id,
+ "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
+ cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )
+ );
CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
OpenPOWER on IntegriCloud