diff options
author | psychocrypt <psychocryptHPC@gmail.com> | 2018-01-30 20:47:56 +0100 |
---|---|---|
committer | psychocrypt <psychocryptHPC@gmail.com> | 2018-01-30 20:47:56 +0100 |
commit | c528f51a767a8e988dc03be080094dc979107499 (patch) | |
tree | 7ca34f10ed0ad3e53acf87fa6d6d41051af126d9 /xmrstak/backend/nvidia/nvcc_code | |
parent | a8e83eec94288cf00d5003071d24221b29f30cab (diff) | |
download | xmr-stak-c528f51a767a8e988dc03be080094dc979107499.zip xmr-stak-c528f51a767a8e988dc03be080094dc979107499.tar.gz |
speedup Volta
- enable L1 cache for Nvidia Volta GPUs and newer
- remove explicit cache controll for Volta GPU and newer
This pull request increases the hash rate for Volta GPUs by ~5%
Diffstat (limited to 'xmrstak/backend/nvidia/nvcc_code')
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 12 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 8 |
2 files changed, 19 insertions, 1 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 15a6f36..cc97274 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m template< typename T > __device__ __forceinline__ T loadGlobal64( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ T loadGlobal32( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ void storeGlobal32( T* addr, T const & val ) { +#if (__CUDA_ARCH__ < 700) asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) ); +#else + *addr = val; +#endif } template<size_t ITERATIONS, uint32_t THREAD_SHIFT> diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index d5d0039..92259db 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) break; }; - CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); + const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1]; + + /* Disable L1 cache for GPUs before Volta. + * L1 speed is increased and latency reduced with Volta. + */ + if(gpuArch < 70) + CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); size_t hashMemSize; if(::jconf::inst()->IsCurrencyMonero()) |