summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/nvidia/nvcc_code
diff options
context:
space:
mode:
authorpsychocrypt <psychocryptHPC@gmail.com>2018-01-30 20:47:56 +0100
committerpsychocrypt <psychocryptHPC@gmail.com>2018-01-30 20:47:56 +0100
commitc528f51a767a8e988dc03be080094dc979107499 (patch)
tree7ca34f10ed0ad3e53acf87fa6d6d41051af126d9 /xmrstak/backend/nvidia/nvcc_code
parenta8e83eec94288cf00d5003071d24221b29f30cab (diff)
downloadxmr-stak-c528f51a767a8e988dc03be080094dc979107499.zip
xmr-stak-c528f51a767a8e988dc03be080094dc979107499.tar.gz
speedup Volta
- enable L1 cache for Nvidia Volta GPUs and newer - remove explicit cache controll for Volta GPU and newer This pull request increases the hash rate for Volta GPUs by ~5%
Diffstat (limited to 'xmrstak/backend/nvidia/nvcc_code')
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_core.cu12
-rw-r--r--xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu8
2 files changed, 19 insertions, 1 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 15a6f36..cc97274 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m
template< typename T >
__device__ __forceinline__ T loadGlobal64( T * const addr )
{
+#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) );
return x;
+#else
+ return *addr;
+#endif
}
template< typename T >
__device__ __forceinline__ T loadGlobal32( T * const addr )
{
+#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) );
return x;
+#else
+ return *addr;
+#endif
}
template< typename T >
__device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
{
+#if (__CUDA_ARCH__ < 700)
asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) );
+#else
+ *addr = val;
+#endif
}
template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index d5d0039..92259db 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
break;
};
- CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
+ const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
+ /* Disable L1 cache for GPUs before Volta.
+ * L1 speed is increased and latency reduced with Volta.
+ */
+ if(gpuArch < 70)
+ CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
size_t hashMemSize;
if(::jconf::inst()->IsCurrencyMonero())
OpenPOWER on IntegriCloud