4 files changed, 16 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 015f7c2..9ae7b26 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,10 @@ if(CUDA_ENABLE)
         if(NOT CUDA_VERSION VERSION_LESS 8.0)
             list(APPEND DEFAULT_CUDA_ARCH "60" "61" "62")
         endif()
+        # add Volta support for CUDA >= 9.0
+        if(NOT CUDA_VERSION VERSION_LESS 9.0)
+            list(APPEND DEFAULT_CUDA_ARCH "70")
+        endif()
         set(CUDA_ARCH "${DEFAULT_CUDA_ARCH}" CACHE STRING "Set GPU architecture (semicolon separated list, e.g. '-DCUDA_ARCH=20;35;60')")
 
         # validate architectures (only numbers are allowed)
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index ca81718..f9908cb 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -896,7 +896,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 			// round up to next multiple of w_size
 			BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size;
 			// number of global threads must be a multiple of the work group size (w_size)
-			assert(BranchNonces%w_size == 0);
+			assert(BranchNonces[i]%w_size == 0);
 			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 			{
 				printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
diff --git a/xmrstak/backend/cpu/hwlocMemory.hpp b/xmrstak/backend/cpu/hwlocMemory.hpp
index 719c1bb..69742be 100644
--- a/xmrstak/backend/cpu/hwlocMemory.hpp
+++ b/xmrstak/backend/cpu/hwlocMemory.hpp
@@ -21,6 +21,13 @@ void bindMemoryToNUMANode( size_t puId )
 	hwloc_topology_init(&topology);
 	hwloc_topology_load(topology);
 
+	if(!hwloc_topology_get_support(topology)->membind->set_thisthread_membind)
+	{
+		printer::inst()->print_msg(L0, "hwloc: set_thisthread_membind not supported");
+		hwloc_topology_destroy(topology);
+		return;
+	}
+
 	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
 
 	for( size_t i = 0;
@@ -45,6 +52,8 @@ void bindMemoryToNUMANode( size_t puId )
 			}
 		}
 	}
+
+	hwloc_topology_destroy(topology);
 }
 #else
 
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index eb395a5..625fbe4 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -410,6 +410,8 @@ void minethd::double_work_main()
 	if(affinity >= 0) //-1 means no affinity
 		bindMemoryToNUMANode(affinity);
 
+	order_fix.set_value();
+
 	cn_hash_fun_dbl hash_fun;
 	cryptonight_ctx* ctx0;
 	cryptonight_ctx* ctx1;