summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-01 21:37:11 +0100
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-01 21:37:11 +0100
commit4dca64c1a9ffcb506ad81720951f7536b70e394e (patch)
tree9b19e206ceaf43dc9ec7cfac0facc0636d9e9f4e
parent2920e9a3227da307b04ee23ecc5c63ecee4a224c (diff)
downloadxmr-stak-4dca64c1a9ffcb506ad81720951f7536b70e394e.zip
xmr-stak-4dca64c1a9ffcb506ad81720951f7536b70e394e.tar.gz
option to controll gpu synchronization
- add option `sync_mode` - update auto suggestion and jconf
-rw-r--r--xmrstak/backend/nvidia/autoAdjust.hpp2
-rw-r--r--xmrstak/backend/nvidia/config.tpl10
-rw-r--r--xmrstak/backend/nvidia/jconf.cpp11
-rw-r--r--xmrstak/backend/nvidia/jconf.hpp1
-rw-r--r--xmrstak/backend/nvidia/minethd.cpp1
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp3
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu17
7 files changed, 39 insertions, 6 deletions
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index c6a7dca..be7d1ce 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -95,7 +95,7 @@ private:
conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
" \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
" \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" +
- " \"affine_to_cpu\" : false,\n" +
+ " \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" +
" },\n";
}
}
diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl
index 99dc023..5479172 100644
--- a/xmrstak/backend/nvidia/config.tpl
+++ b/xmrstak/backend/nvidia/config.tpl
@@ -9,6 +9,12 @@ R"===(
* bsleep - Insert a delay of X microseconds between kernel launches.
* Increase if you want to reduce GPU lag. Recommended setting on GUI systems - 100
* affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner.
+ * sync_mode - method used to synchronize the device
+ * documentation: http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac
+ * 0 = cudaDeviceScheduleAuto
+ * 1 = cudaDeviceScheduleSpin - create a high load on one cpu thread per gpu
+ * 2 = cudaDeviceScheduleYield
+ * 3 = cudaDeviceScheduleBlockingSync (default)
*
* On the first run the miner will look at your system and suggest a basic configuration that will work,
* you can try to tweak it from there to get the best performance.
@@ -16,7 +22,9 @@ R"===(
* A filled out configuration should look like this:
* "gpu_threads_conf" :
* [
- * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false},
+ * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
+ * "affine_to_cpu" : false, "sync_mode" : 3,
+ * },
* ],
*/
diff --git a/xmrstak/backend/nvidia/jconf.cpp b/xmrstak/backend/nvidia/jconf.cpp
index 4208145..46c5726 100644
--- a/xmrstak/backend/nvidia/jconf.cpp
+++ b/xmrstak/backend/nvidia/jconf.cpp
@@ -123,16 +123,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg)
if(!oThdConf.IsObject())
return false;
- const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff;
+ const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff, *syncMode;
gid = GetObjectMember(oThdConf, "index");
blocks = GetObjectMember(oThdConf, "blocks");
threads = GetObjectMember(oThdConf, "threads");
bfactor = GetObjectMember(oThdConf, "bfactor");
bsleep = GetObjectMember(oThdConf, "bsleep");
aff = GetObjectMember(oThdConf, "affine_to_cpu");
+ syncMode = GetObjectMember(oThdConf, "sync_mode");
if(gid == nullptr || blocks == nullptr || threads == nullptr ||
- bfactor == nullptr || bsleep == nullptr || aff == nullptr)
+ bfactor == nullptr || bsleep == nullptr || aff == nullptr || syncMode == nullptr)
{
return false;
}
@@ -155,11 +156,17 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg)
if(!aff->IsUint64() && !aff->IsBool())
return false;
+ if(!syncMode->IsNumber() || syncMode->GetInt() < 0 || syncMode->GetInt() > 3)
+ {
+ printer::inst()->print_msg(L0, "Error NVIDIA: sync_mode out of range or no number. ( range: 0 <= sync_mode < 4.)");
+ return false;
+ }
cfg.id = gid->GetInt();
cfg.blocks = blocks->GetInt();
cfg.threads = threads->GetInt();
cfg.bfactor = bfactor->GetInt();
cfg.bsleep = bsleep->GetInt();
+ cfg.syncMode = syncMode->GetInt();
if(aff->IsNumber())
cfg.cpu_aff = aff->GetInt();
diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp
index b09a162..7f60f1d 100644
--- a/xmrstak/backend/nvidia/jconf.hpp
+++ b/xmrstak/backend/nvidia/jconf.hpp
@@ -28,6 +28,7 @@ public:
bool bDoubleMode;
bool bNoPrefetch;
int32_t cpu_aff;
+ int syncMode;
long long iCpuAff;
};
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 9eab1c0..6e628fd 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -77,6 +77,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
ctx.device_threads = (int)cfg.threads;
ctx.device_bfactor = (int)cfg.bfactor;
ctx.device_bsleep = (int)cfg.bsleep;
+ ctx.syncMode = cfg.syncMode;
this->affinity = cfg.cpu_aff;
std::unique_lock<std::mutex> lck(thd_aff_set);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
index 1b63379..afbdbaf 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
@@ -11,7 +11,8 @@ typedef struct {
int device_blocks;
int device_threads;
int device_bfactor;
- int device_bsleep;
+ int device_bsleep;
+ int syncMode;
uint32_t *d_input;
uint32_t inputlen;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 333ae73..0fc99a4 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -189,7 +189,22 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
}
cudaDeviceReset();
- cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ switch(ctx->syncMode)
+ {
+ case 0:
+ cudaSetDeviceFlags(cudaDeviceScheduleAuto);
+ break;
+ case 1:
+ cudaSetDeviceFlags(cudaDeviceScheduleSpin);
+ break;
+ case 2:
+ cudaSetDeviceFlags(cudaDeviceScheduleYield);
+ break;
+ case 3:
+ cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ break;
+
+ };
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
size_t hashMemSize;
OpenPOWER on IntegriCloud