summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/FAQ.md11
-rw-r--r--doc/compile_Linux.md2
-rw-r--r--xmrstak/backend/amd/amd_gpu/gpu.cpp2
-rw-r--r--xmrstak/backend/amd/autoAdjust.hpp3
-rw-r--r--xmrstak/backend/amd/jconf.cpp7
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_core.cu12
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu8
7 files changed, 37 insertions, 8 deletions
diff --git a/doc/FAQ.md b/doc/FAQ.md
index 641a50d..ffbc36f 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -41,13 +41,18 @@ Download and install this [runtime package](https://go.microsoft.com/fwlink/?Lin
## Error: MEMORY ALLOC FAILED: mmap failed
-On Linux you will need to configure large page support `sudo sysctl -w vm.nr_hugepages=128` and increase your
-ulimit -l. To do this you need to add following lines to /etc/security/limits.conf:
+On Linux you will need to configure large page support and increase your ulimit -l.
+
+To set large page support, add the following lines to /etc/sysctl.conf:
+
+ vm.nr_hugepages=128
+
+To increase the ulimit, add following lines to /etc/security/limits.conf:
* soft memlock 262144
* hard memlock 262144
-Save file. You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session).
+You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session).
You can also do it Windows-style and simply run-as-root, but this is NOT recommended for security reasons.
diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md
index 8d48e72..729f4d2 100644
--- a/doc/compile_Linux.md
+++ b/doc/compile_Linux.md
@@ -8,7 +8,7 @@
### Cuda 8.0+ (only needed to use NVIDIA GPUs)
-- donwload and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads)
+- download and install [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads)
- for minimal install choose `Custom installation options` during the install and select
- CUDA/Develpment
- CUDA/Runtime
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index d9bc962..c39c567 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -518,13 +518,13 @@ std::vector<GpuContext> getAMDDevices(int index)
printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
continue;
}
- printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
// if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory
ctx.deviceIdx = k;
ctx.freeMem = std::min(ctx.freeMem, maxMem);
ctx.name = std::string(devNameVec.data());
ctx.DeviceID = device_list[k];
+ printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
ctxVec.push_back(ctx);
}
}
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 93b71ba..afedb5c 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -117,6 +117,9 @@ private:
*/
maxThreads = 2024u;
}
+ // increase all intensity limits by two for aeon
+ if(!::jconf::inst()->IsCurrencyMonero())
+ maxThreads *= 2u;
// keep 128MiB memory free (value is randomly chosen)
size_t availableMem = ctx.freeMem - (128u * byteToMiB);
diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp
index 07afb19..f126342 100644
--- a/xmrstak/backend/amd/jconf.cpp
+++ b/xmrstak/backend/amd/jconf.cpp
@@ -56,9 +56,10 @@ struct configVal {
Type iType;
};
-//Same order as in configEnum, as per comment above
+// Same order as in configEnum, as per comment above
+// kNullType means any type
configVal oConfigValues[] = {
- { aGpuThreadsConf, "gpu_threads_conf", kArrayType },
+ { aGpuThreadsConf, "gpu_threads_conf", kNullType },
{ iPlatformIdx, "platform_index", kNumberType }
};
@@ -68,6 +69,8 @@ inline bool checkType(Type have, Type want)
{
if(want == have)
return true;
+ else if(want == kNullType)
+ return true;
else if(want == kTrueType && have == kFalseType)
return true;
else if(want == kFalseType && have == kTrueType)
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 15a6f36..cc97274 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m
template< typename T >
__device__ __forceinline__ T loadGlobal64( T * const addr )
{
+#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) );
return x;
+#else
+ return *addr;
+#endif
}
template< typename T >
__device__ __forceinline__ T loadGlobal32( T * const addr )
{
+#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) );
return x;
+#else
+ return *addr;
+#endif
}
template< typename T >
__device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
{
+#if (__CUDA_ARCH__ < 700)
asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) );
+#else
+ *addr = val;
+#endif
}
template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index d5d0039..92259db 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
break;
};
- CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
+ const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
+ /* Disable L1 cache for GPUs before Volta.
+ * L1 speed is increased and latency reduced with Volta.
+ */
+ if(gpuArch < 70)
+ CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
size_t hashMemSize;
if(::jconf::inst()->IsCurrencyMonero())
OpenPOWER on IntegriCloud