6 files changed, 94 insertions, 76 deletions
diff --git a/xmrstak/backend/cpu/hwlocMemory.cpp b/xmrstak/backend/cpu/hwlocMemory.cpp
new file mode 100644
index 0000000..94d2b53
--- /dev/null
+++ b/xmrstak/backend/cpu/hwlocMemory.cpp
@@ -0,0 +1,64 @@
+#include "xmrstak/backend/cpu/hwlocMemory.hpp"
+
+#ifndef CONF_NO_HWLOC
+
+#include "xmrstak/misc/console.hpp"
+
+#include <hwloc.h>
+
+/** pin memory to NUMA node
+ *
+ * Set the default memory policy for the current thread to bind memory to the
+ * NUMA node.
+ *
+ * @param puId core id
+ */
+void bindMemoryToNUMANode( size_t puId )
+{
+	int depth;
+	hwloc_topology_t topology;
+
+	hwloc_topology_init(&topology);
+	hwloc_topology_load(topology);
+
+	if(!hwloc_topology_get_support(topology)->membind->set_thisthread_membind)
+	{
+		printer::inst()->print_msg(L0, "hwloc: set_thisthread_membind not supported");
+		hwloc_topology_destroy(topology);
+		return;
+	}
+
+	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
+
+	for( size_t i = 0;
+		i < hwloc_get_nbobjs_by_depth(topology, depth);
+		i++ )
+	{
+		hwloc_obj_t pu = hwloc_get_obj_by_depth(topology, depth, i);
+		if(  pu->os_index == puId )
+		{
+			if( 0 > hwloc_set_membind_nodeset(
+				topology,
+				pu->nodeset,
+				HWLOC_MEMBIND_BIND,
+				HWLOC_MEMBIND_THREAD))
+			{
+				printer::inst()->print_msg(L0, "hwloc: can't bind memory");
+			}
+			else
+			{
+				printer::inst()->print_msg(L0, "hwloc: memory pinned");
+				break;
+			}
+		}
+	}
+
+	hwloc_topology_destroy(topology);
+}
+#else
+
+void bindMemoryToNUMANode( size_t )
+{
+}
+
+#endif
diff --git a/xmrstak/backend/cpu/hwlocMemory.hpp b/xmrstak/backend/cpu/hwlocMemory.hpp
index 69742be..2130c2c 100644
--- a/xmrstak/backend/cpu/hwlocMemory.hpp
+++ b/xmrstak/backend/cpu/hwlocMemory.hpp
@@ -1,10 +1,6 @@
 #pragma once
 
-#include "xmrstak/misc/console.hpp"
-
-#ifndef CONF_NO_HWLOC
-
-#include <hwloc.h>
+#include <cstddef>
 
 /** pin memory to NUMA node
  *
@@ -13,52 +9,4 @@
  *
  * @param puId core id
  */
-void bindMemoryToNUMANode( size_t puId )
-{
-	int depth;
-	hwloc_topology_t topology;
-
-	hwloc_topology_init(&topology);
-	hwloc_topology_load(topology);
-
-	if(!hwloc_topology_get_support(topology)->membind->set_thisthread_membind)
-	{
-		printer::inst()->print_msg(L0, "hwloc: set_thisthread_membind not supported");
-		hwloc_topology_destroy(topology);
-		return;
-	}
-
-	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
-
-	for( size_t i = 0;
-		i < hwloc_get_nbobjs_by_depth(topology, depth);
-		i++ )
-	{
-		hwloc_obj_t pu = hwloc_get_obj_by_depth(topology, depth, i);
-		if(  pu->os_index == puId )
-		{
-			if( 0 > hwloc_set_membind_nodeset(
-				topology,
-				pu->nodeset,
-				HWLOC_MEMBIND_BIND,
-				HWLOC_MEMBIND_THREAD))
-			{
-				printer::inst()->print_msg(L0, "hwloc: can't bind memory");
-			}
-			else
-			{
-				printer::inst()->print_msg(L0, "hwloc: memory pinned");
-				break;
-			}
-		}
-	}
-
-	hwloc_topology_destroy(topology);
-}
-#else
-
-void bindMemoryToNUMANode( size_t )
-{
-}
-
-#endif
+void bindMemoryToNUMANode( size_t puId );
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 0bc6214..0cf9a42 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -222,7 +222,6 @@ void minethd::work_main()
 	}
 
 	bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero");
-	bool useAEON = strcmp_i(::jconf::inst()->GetCurrency(), "aeon");
 	
 	while (bQuit == 0)
 	{
@@ -261,18 +260,9 @@ void minethd::work_main()
 			uint32_t foundCount;
 
 			cryptonight_extra_cpu_prepare(&ctx, iNonce);
-#ifndef CONF_NO_MONERO
-			if(mineMonero)
-			{
-				cryptonight_core_cpu_hash<MONERO_ITER, MONERO_MASK, 19>(&ctx);
-			}
-#endif
-#ifndef CONF_NO_AEON
-			if(useAEON)
-			{
-				cryptonight_core_cpu_hash<MONERO_ITER, MONERO_MASK, 18>(&ctx);
-			}
-#endif
+
+			cryptonight_core_cpu_hash(&ctx, mineMonero);
+
 			cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce);
 
 			for(size_t i = 0; i < foundCount; i++)
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index d1fce40..f6d989c 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -4,7 +4,7 @@
 #include "jconf.hpp"
 #include "nvcc_code/cryptonight.hpp"
 
-#include "xmrstak/backend/cpu/crypto/cryptonight.h"
+#include "xmrstak/backend/cpu/minethd.hpp"
 #include "xmrstak/backend/iBackend.hpp"
 #include "xmrstak/misc/environment.hpp"
 
diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
index 4e0ace7..0cfdaac 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
@@ -44,6 +44,4 @@ void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce);
 void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce);
 }
 
-template<size_t ITERATIONS, size_t THREAD_SHIFT, uint32_t MASK>
-void cryptonight_core_cpu_hash(nvid_ctx* ctx);
-
+void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 5bea230..a92fa8c 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -1,3 +1,5 @@
+#include "xmrstak/backend/cryptonight.hpp"
+
 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
@@ -92,7 +94,7 @@ __device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
 	asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) );
 }
 
-template<size_t ITERATIONS, size_t THREAD_SHIFT>
+template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
 __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state, uint32_t * __restrict__ ctx_key1 )
 {
 	__shared__ uint32_t sharedMemory[1024];
@@ -168,7 +170,7 @@ __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_
 #ifdef XMR_STAK_THREADS
 __launch_bounds__( XMR_STAK_THREADS * 4 )
 #endif
-template<size_t ITERATIONS, size_t THREAD_SHIFT, uint32_t MASK>
+template<size_t ITERATIONS, uint32_t THREAD_SHIFT, uint32_t MASK>
 __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b )
 {
 	__shared__ uint32_t sharedMemory[1024];
@@ -257,7 +259,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 	}
 }
 
-template<size_t ITERATIONS, size_t THREAD_SHIFT>
+template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
 __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 )
 {
 	__shared__ uint32_t sharedMemory[1024];
@@ -292,8 +294,8 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 );
 }
 
-template<size_t ITERATIONS, size_t THREAD_SHIFT, uint32_t MASK>
-void cryptonight_core_cpu_hash(nvid_ctx* ctx)
+template<size_t ITERATIONS, uint32_t MASK, uint32_t THREAD_SHIFT>
+void cryptonight_core_gpu_hash(nvid_ctx* ctx)
 {
 	dim3 grid( ctx->device_blocks );
 	dim3 block( ctx->device_threads );
@@ -349,3 +351,19 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx)
 			ctx->d_ctx_state, ctx->d_ctx_key2 ));
 	}
 }
+
+void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero)
+{
+#ifndef CONF_NO_MONERO
+	if(mineMonero)
+	{
+		cryptonight_core_gpu_hash<MONERO_ITER, MONERO_MASK, 19u>(ctx);
+	}
+#endif
+#ifndef CONF_NO_AEON
+	if(!mineMonero)
+	{
+		cryptonight_core_gpu_hash<AEON_ITER, AEON_MASK, 18u>(ctx);
+	}
+#endif
+}