fix CUDA launch bounds usage

fix #191 lauch bounds must be placed before the return type but after the template paramater
author: psychocrypt <psychocrypt@users.noreply.github.com> 2017-11-23 21:25:25 +0100
committer: psychocrypt <psychocrypt@users.noreply.github.com> 2017-11-23 21:25:25 +0100
commit: 43812b36d763259216b288c62ea343e131510244 (patch)
tree: 50d5c30b0838e2cbc141da2305af64bc4cfb5850
parent: fb6baa5d46cfa3d5e2b835544ea83e736b6903e4 (diff)
download: xmr-stak-43812b36d763259216b288c62ea343e131510244.zip
xmr-stak-43812b36d763259216b288c62ea343e131510244.tar.gz
1 files changed, 1 insertions, 1 deletions
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index a92fa8c..dba6676 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -167,10 +167,10 @@ __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_
 #endif
 }
 
+template<size_t ITERATIONS, uint32_t THREAD_SHIFT, uint32_t MASK>
 #ifdef XMR_STAK_THREADS
 __launch_bounds__( XMR_STAK_THREADS * 4 )
 #endif
-template<size_t ITERATIONS, uint32_t THREAD_SHIFT, uint32_t MASK>
 __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b )
 {
 	__shared__ uint32_t sharedMemory[1024];
author	psychocrypt <psychocrypt@users.noreply.github.com>	2017-11-23 21:25:25 +0100
committer	psychocrypt <psychocrypt@users.noreply.github.com>	2017-11-23 21:25:25 +0100
commit	43812b36d763259216b288c62ea343e131510244 (patch)
tree	50d5c30b0838e2cbc141da2305af64bc4cfb5850
parent	fb6baa5d46cfa3d5e2b835544ea83e736b6903e4 (diff)
download	xmr-stak-43812b36d763259216b288c62ea343e131510244.zip xmr-stak-43812b36d763259216b288c62ea343e131510244.tar.gz