diff options
author | fireice-uk <fireice-uk@users.noreply.github.com> | 2018-02-03 10:30:16 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-03 10:30:16 +0000 |
commit | 8bef63c913cdd6871a1306bc5fc62ecff9308c2e (patch) | |
tree | bd5a019bab33a81065f77bec3ef7d4ff03e751d5 /xmrstak/backend | |
parent | 4d7fdd2dea4306923d3ccfc8f58658e3429a3587 (diff) | |
parent | c528f51a767a8e988dc03be080094dc979107499 (diff) | |
download | xmr-stak-8bef63c913cdd6871a1306bc5fc62ecff9308c2e.zip xmr-stak-8bef63c913cdd6871a1306bc5fc62ecff9308c2e.tar.gz |
Merge pull request #1014 from psychocrypt/topic-speedupVolta
speedup Volta
Diffstat (limited to 'xmrstak/backend')
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 12 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 8 |
2 files changed, 19 insertions, 1 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 15a6f36..cc97274 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m template< typename T > __device__ __forceinline__ T loadGlobal64( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ T loadGlobal32( T * const addr ) { +#if (__CUDA_ARCH__ < 700) T x; asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) ); return x; +#else + return *addr; +#endif } template< typename T > __device__ __forceinline__ void storeGlobal32( T* addr, T const & val ) { +#if (__CUDA_ARCH__ < 700) asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) ); +#else + *addr = val; +#endif } template<size_t ITERATIONS, uint32_t THREAD_SHIFT> diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index d5d0039..92259db 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) break; }; - CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); + const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1]; + + /* Disable L1 cache for GPUs before Volta. + * L1 speed is increased and latency reduced with Volta. + */ + if(gpuArch < 70) + CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); size_t hashMemSize; if(::jconf::inst()->IsCurrencyMonero()) |