diff options
Diffstat (limited to 'xmrstak')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/cpu/hwlocMemory.hpp | 9 | ||||
-rw-r--r-- | xmrstak/backend/cpu/minethd.cpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 6 |
4 files changed, 17 insertions, 2 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index ca81718..f9908cb 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -896,7 +896,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) // round up to next multiple of w_size BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size; // number of global threads must be a multiple of the work group size (w_size) - assert(BranchNonces%w_size == 0); + assert(BranchNonces[i]%w_size == 0); if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3); diff --git a/xmrstak/backend/cpu/hwlocMemory.hpp b/xmrstak/backend/cpu/hwlocMemory.hpp index 719c1bb..69742be 100644 --- a/xmrstak/backend/cpu/hwlocMemory.hpp +++ b/xmrstak/backend/cpu/hwlocMemory.hpp @@ -21,6 +21,13 @@ void bindMemoryToNUMANode( size_t puId ) hwloc_topology_init(&topology); hwloc_topology_load(topology); + if(!hwloc_topology_get_support(topology)->membind->set_thisthread_membind) + { + printer::inst()->print_msg(L0, "hwloc: set_thisthread_membind not supported"); + hwloc_topology_destroy(topology); + return; + } + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); for( size_t i = 0; @@ -45,6 +52,8 @@ void bindMemoryToNUMANode( size_t puId ) } } } + + hwloc_topology_destroy(topology); } #else diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index eb395a5..625fbe4 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -410,6 +410,8 @@ void minethd::double_work_main() if(affinity >= 0) //-1 means no affinity bindMemoryToNUMANode(affinity); + order_fix.set_value(); + cn_hash_fun_dbl hash_fun; cryptonight_ctx* ctx0; cryptonight_ctx* ctx1; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 0c086e8..e1b78ce 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -156,7 +156,11 @@ __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_ #else unusedVar( ptr ); unusedVar( sub ); - return __shfl( val, src, 4 ); +# if(__CUDACC_VER_MAJOR__ >= 9) + return __shfl_sync(0xFFFFFFFF, val, src, 4 ); +# else + return __shfl( val, src, 4 ); +# endif #endif } |