42 files changed, 701 insertions, 567 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 03100d0..c664d53 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -26,7 +26,7 @@
 #include <algorithm>
 #include <regex>
 #include <cassert>
-#include <algorithm> 
+#include <algorithm>
 
 #include <fstream>
 #include <sstream>
@@ -397,7 +397,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		 * used data:
 		 *   - source code
 		 *   - device name
-		 *   - compile paramater
+		 *   - compile parameter
 		 */
 		std::string src_str(source_code);
 		src_str += options;
@@ -476,14 +476,14 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 			}
 			while(status == CL_BUILD_IN_PROGRESS);
 
-			std::vector<size_t> binary_sizes(num_devices);
-			clGetProgramInfo (ctx->Program[ii], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
-
-			std::vector<char*> all_programs(num_devices);
-			std::vector<std::vector<char>> program_storage;
-
 			if(xmrstak::params::inst().AMDCache)
 			{
+				std::vector<size_t> binary_sizes(num_devices);
+				clGetProgramInfo (ctx->Program[ii], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
+
+				std::vector<char*> all_programs(num_devices);
+				std::vector<std::vector<char>> program_storage;
+
 				int p_id = 0;
 				size_t mem_size = 0;
 				// create memory  structure to query all OpenCL program binaries
@@ -935,7 +935,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
 
 size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo)
 {
-	// switch to the kernel storage 
+	// switch to the kernel storage
 	int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;
 
 	cl_int ret;
@@ -1004,12 +1004,12 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
 		return(ERR_OCL_API);
 	}
 
-	if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon )
+	if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite)
 	{
 		// Input
 		if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
 		{
-			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, arugment 4(input buffer).", err_to_str(ret));
+			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 4(input buffer).", err_to_str(ret));
 			return ERR_OCL_API;
 		}
 	}
@@ -1102,7 +1102,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
 {
 	// switch to the kernel storage
 	int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;
-	
+
 	cl_int ret;
 	cl_uint zero = 0;
 	size_t BranchNonces[4];
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index d2ae1a7..c925c87 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -89,7 +89,7 @@ XMRSTAK_INCLUDE_BLAKE256
 //#include "opencl/groestl256.cl"
 XMRSTAK_INCLUDE_GROESTL256
 
-static const __constant ulong keccakf_rndc[24] = 
+static const __constant ulong keccakf_rndc[24] =
 {
     0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
     0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
@@ -101,7 +101,7 @@ static const __constant ulong keccakf_rndc[24] =
     0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 };
 
-static const __constant uchar sbox[256] = 
+static const __constant uchar sbox[256] =
 {
 	0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
 	0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
@@ -124,7 +124,7 @@ static const __constant uchar sbox[256] =
 
 void keccakf1600(ulong *s)
 {
-    for(int i = 0; i < 24; ++i) 
+    for(int i = 0; i < 24; ++i)
     {
 		ulong bc[5], tmp1, tmp2;
         bc[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20] ^ rotate(s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22], 1UL);
@@ -132,9 +132,9 @@ void keccakf1600(ulong *s)
         bc[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22] ^ rotate(s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24], 1UL);
         bc[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23] ^ rotate(s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20], 1UL);
         bc[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24] ^ rotate(s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21], 1UL);
-        
+
         tmp1 = s[1] ^ bc[0];
-        
+
         s[0] ^= bc[4];
         s[1] = rotate(s[6] ^ bc[0], 44UL);
         s[6] = rotate(s[9] ^ bc[3], 20UL);
@@ -160,7 +160,7 @@ void keccakf1600(ulong *s)
         s[11] = rotate(s[7] ^ bc[1], 6UL);
         s[7] = rotate(s[10] ^ bc[4], 3UL);
         s[10] = rotate(tmp1, 1UL);
-        
+
         tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
         tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
         tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
@@ -170,23 +170,23 @@ void keccakf1600(ulong *s)
     }
 }
 
-static const __constant uint keccakf_rotc[24] = 
+static const __constant uint keccakf_rotc[24] =
 {
-    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14, 
+    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
     27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
 };
 
-static const __constant uint keccakf_piln[24] = 
+static const __constant uint keccakf_piln[24] =
 {
-    10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4, 
-    15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1 
+    10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
+    15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
 };
 
 void keccakf1600_1(ulong *st)
 {
     int i, round;
     ulong t, bc[5];
-	
+
 	#pragma unroll 1
     for(round = 0; round < 24; ++round)
     {
@@ -197,7 +197,7 @@ void keccakf1600_1(ulong *st)
         bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
         bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
         bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-		
+
 		#pragma unroll 1
         for (i = 0; i < 5; ++i) {
             t = bc[(i + 4) % 5] ^ rotate(bc[(i + 1) % 5], 1UL);
@@ -222,20 +222,20 @@ void keccakf1600_1(ulong *st)
         //tmp1 = st[10]; tmp2 = st[11]; st[10] = bitselect(st[10] ^ st[12], st[10], st[11]); st[11] = bitselect(st[11] ^ st[13], st[11], st[12]); st[12] = bitselect(st[12] ^ st[14], st[12], st[13]); st[13] = bitselect(st[13] ^ tmp1, st[13], st[14]); st[14] = bitselect(st[14] ^ tmp2, st[14], tmp1);
         //tmp1 = st[15]; tmp2 = st[16]; st[15] = bitselect(st[15] ^ st[17], st[15], st[16]); st[16] = bitselect(st[16] ^ st[18], st[16], st[17]); st[17] = bitselect(st[17] ^ st[19], st[17], st[18]); st[18] = bitselect(st[18] ^ tmp1, st[18], st[19]); st[19] = bitselect(st[19] ^ tmp2, st[19], tmp1);
         //tmp1 = st[20]; tmp2 = st[21]; st[20] = bitselect(st[20] ^ st[22], st[20], st[21]); st[21] = bitselect(st[21] ^ st[23], st[21], st[22]); st[22] = bitselect(st[22] ^ st[24], st[22], st[23]); st[23] = bitselect(st[23] ^ tmp1, st[23], st[24]); st[24] = bitselect(st[24] ^ tmp2, st[24], tmp1);
-        
+
         #pragma unroll 1
         for(int i = 0; i < 25; i += 5)
-        {	
+        {
 			ulong tmp[5];
-			
+
 			#pragma unroll 1
 			for(int x = 0; x < 5; ++x)
 				tmp[x] = bitselect(st[i + x] ^ st[i + ((x + 2) % 5)], st[i + x], st[i + ((x + 1) % 5)]);
-			
+
 			#pragma unroll 1
 			for(int x = 0; x < 5; ++x) st[i + x] = tmp[x];
         }
-        
+
         //  Iota
         st[0] ^= keccakf_rndc[round];
     }
@@ -246,7 +246,7 @@ void keccakf1600_2(ulong *st)
 {
     int i, round;
     ulong t, bc[5];
-	
+
 	#pragma unroll 1
     for(round = 0; round < 24; ++round)
     {
@@ -257,7 +257,7 @@ void keccakf1600_2(ulong *st)
         //bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
         //bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
         //bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-		
+
 		/*
 		#pragma unroll
         for (i = 0; i < 5; ++i) {
@@ -269,43 +269,43 @@ void keccakf1600_2(ulong *st)
             st[i + 20] ^= t;
         }
 		*/
-		
+
 		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20] ^ rotate(st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22], 1UL);
 		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21] ^ rotate(st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23], 1UL);
 		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22] ^ rotate(st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24], 1UL);
 		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23] ^ rotate(st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20], 1UL);
 		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24] ^ rotate(st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21], 1UL);
-		
+
 		st[0] ^= bc[4];
 		st[5] ^= bc[4];
 		st[10] ^= bc[4];
 		st[15] ^= bc[4];
 		st[20] ^= bc[4];
-		
+
 		st[1] ^= bc[0];
 		st[6] ^= bc[0];
 		st[11] ^= bc[0];
 		st[16] ^= bc[0];
 		st[21] ^= bc[0];
-		
+
 		st[2] ^= bc[1];
 		st[7] ^= bc[1];
 		st[12] ^= bc[1];
 		st[17] ^= bc[1];
 		st[22] ^= bc[1];
-		
+
 		st[3] ^= bc[2];
 		st[8] ^= bc[2];
 		st[13] ^= bc[2];
 		st[18] ^= bc[2];
 		st[23] ^= bc[2];
-		
+
 		st[4] ^= bc[3];
 		st[9] ^= bc[3];
 		st[14] ^= bc[3];
 		st[19] ^= bc[3];
 		st[24] ^= bc[3];
-		
+
         // Rho Pi
         t = st[1];
         #pragma unroll
@@ -314,11 +314,11 @@ void keccakf1600_2(ulong *st)
             st[keccakf_piln[i]] = rotate(t, (ulong)keccakf_rotc[i]);
             t = bc[0];
         }
-		
-		
-		
+
+
+
 		/*ulong tmp1 = st[1] ^ bc[0];
-        
+
         st[0] ^= bc[4];
         st[1] = rotate(st[6] ^ bc[0], 44UL);
         st[6] = rotate(st[9] ^ bc[3], 20UL);
@@ -345,26 +345,26 @@ void keccakf1600_2(ulong *st)
         st[7] = rotate(st[10] ^ bc[4], 3UL);
         st[10] = rotate(tmp1, 1UL);
 		*/
-		
-		
+
+
         //ulong tmp1 = st[0]; ulong tmp2 = st[1]; st[0] = bitselect(st[0] ^ st[2], st[0], st[1]); st[1] = bitselect(st[1] ^ st[3], st[1], st[2]); st[2] = bitselect(st[2] ^ st[4], st[2], st[3]); st[3] = bitselect(st[3] ^ tmp1, st[3], st[4]); st[4] = bitselect(st[4] ^ tmp2, st[4], tmp1);
         //tmp1 = st[5]; tmp2 = st[6]; st[5] = bitselect(st[5] ^ st[7], st[5], st[6]); st[6] = bitselect(st[6] ^ st[8], st[6], st[7]); st[7] = bitselect(st[7] ^ st[9], st[7], st[8]); st[8] = bitselect(st[8] ^ tmp1, st[8], st[9]); st[9] = bitselect(st[9] ^ tmp2, st[9], tmp1);
         //tmp1 = st[10]; tmp2 = st[11]; st[10] = bitselect(st[10] ^ st[12], st[10], st[11]); st[11] = bitselect(st[11] ^ st[13], st[11], st[12]); st[12] = bitselect(st[12] ^ st[14], st[12], st[13]); st[13] = bitselect(st[13] ^ tmp1, st[13], st[14]); st[14] = bitselect(st[14] ^ tmp2, st[14], tmp1);
         //tmp1 = st[15]; tmp2 = st[16]; st[15] = bitselect(st[15] ^ st[17], st[15], st[16]); st[16] = bitselect(st[16] ^ st[18], st[16], st[17]); st[17] = bitselect(st[17] ^ st[19], st[17], st[18]); st[18] = bitselect(st[18] ^ tmp1, st[18], st[19]); st[19] = bitselect(st[19] ^ tmp2, st[19], tmp1);
         //tmp1 = st[20]; tmp2 = st[21]; st[20] = bitselect(st[20] ^ st[22], st[20], st[21]); st[21] = bitselect(st[21] ^ st[23], st[21], st[22]); st[22] = bitselect(st[22] ^ st[24], st[22], st[23]); st[23] = bitselect(st[23] ^ tmp1, st[23], st[24]); st[24] = bitselect(st[24] ^ tmp2, st[24], tmp1);
-        
+
         #pragma unroll
         for(int i = 0; i < 25; i += 5)
         {
 			ulong tmp1 = st[i], tmp2 = st[i + 1];
-			
+
 			st[i] = bitselect(st[i] ^ st[i + 2], st[i], st[i + 1]);
 			st[i + 1] = bitselect(st[i + 1] ^ st[i + 3], st[i + 1], st[i + 2]);
 			st[i + 2] = bitselect(st[i + 2] ^ st[i + 4], st[i + 2], st[i + 3]);
 			st[i + 3] = bitselect(st[i + 3] ^ tmp1, st[i + 3], st[i + 4]);
 			st[i + 4] = bitselect(st[i + 4] ^ tmp2, st[i + 4], tmp1);
         }
-        
+
         //  Iota
         st[0] ^= keccakf_rndc[round];
     }
@@ -376,22 +376,22 @@ R"===(
 void CNKeccak(ulong *output, ulong *input)
 {
 	ulong st[25];
-	
+
 	// Copy 72 bytes
 	for(int i = 0; i < 9; ++i) st[i] = input[i];
-	
+
 	// Last four and '1' bit for padding
 	//st[9] = as_ulong((uint2)(((uint *)input)[18], 0x00000001U));
-	
+
 	st[9] = (input[9] & 0x00000000FFFFFFFFUL) | 0x0000000100000000UL;
-	
+
 	for(int i = 10; i < 25; ++i) st[i] = 0x00UL;
-	
+
 	// Last bit of padding
 	st[16] = 0x8000000000000000UL;
-	
+
 	keccakf1600_1(st);
-	
+
 	for(int i = 0; i < 25; ++i) output[i] = st[i];
 }
 
@@ -408,7 +408,7 @@ void AESExpandKey256(uint *keybuf)
 	{
 		// For 256-bit keys, an sbox permutation is done every other 4th uint generated, AND every 8th
 		uint t = ((!(c & 7)) || ((c & 7) == 4)) ? SubWord(keybuf[c - 1]) : keybuf[c - 1];
-		
+
 		// If the uint we're generating has an index that is a multiple of 8, rotate and XOR with the round constant,
 		// then XOR this with previously generated uint. If it's 4 after a multiple of 8, only the sbox permutation
 		// is done, followed by the XOR. If neither are true, only the XOR with the previously generated uint is done.
@@ -434,7 +434,7 @@ inline ulong getIdx()
 }
 
 #define mix_and_propagate(xin) (xin)[(get_local_id(1)) % 8][get_local_id(0)] ^ (xin)[(get_local_id(1) + 1) % 8][get_local_id(0)]
-		
+
 #define JOIN_DO(x,y) x##y
 #define JOIN(x,y) JOIN_DO(x,y)
 
@@ -460,7 +460,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
 	}
 
 	barrier(CLK_LOCAL_MEM_FENCE);
-		
+
 #if(COMP_MODE==1)
 	// do not use early return here
 	if(gIdx < Threads)
@@ -512,12 +512,12 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
 	}
 
 	mem_fence(CLK_LOCAL_MEM_FENCE);
-		
+
 // cryptonight_heavy
 #if (ALGO == 4)
 	__local uint4 xin[8][WORKSIZE];
 
-	/* Also left over threads performe this loop.
+	/* Also left over threads perform this loop.
 	 * The left over thread results will be ignored
 	 */
 	for(size_t i=0; i < 16; i++)
@@ -553,8 +553,8 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states, ulong Threads
-// cryptonight_monero || cryptonight_aeon
-#if(ALGO == 3 || ALGO == 5)
+// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite
+#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7)
 , __global ulong *input
 #endif
 )
@@ -574,7 +574,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 	}
 
 	barrier(CLK_LOCAL_MEM_FENCE);
-#if(ALGO == 3 || ALGO == 5)
+// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite
+#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7)
     uint2 tweak1_2;
 #endif
 	uint4 b_x;
@@ -598,7 +599,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 		b[1] = states[3] ^ states[7];
 
 		b_x = ((uint4 *)b)[0];
-#if(ALGO == 3 || ALGO == 5)
+// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite
+#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7)
 		tweak1_2 = as_uint2(input[4]);
 		tweak1_2.s0 >>= 24;
 		tweak1_2.s0 |= tweak1_2.s1 << 8;
@@ -625,9 +627,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 			((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
 
 			b_x ^= ((uint4 *)c)[0];
-#if(ALGO == 3 || ALGO == 5)
+// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite
+#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7)
 			uint table = 0x75310U;
+// cryptonight_stellite
+#	if(ALGO == 7)
+			uint index = ((b_x.s2 >> 27) & 12) | ((b_x.s2 >> 23) & 2);
+#	else
 			uint index = ((b_x.s2 >> 26) & 12) | ((b_x.s2 >> 23) & 2);
+#	endif
 			b_x.s2 ^= ((table >> index) & 0x30U) << 24;
 #endif
 			Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x;
@@ -638,11 +646,20 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 			a[1] += c[0] * as_ulong2(tmp).s0;
 			a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
 
+// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite
+#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7)
 
-#if(ALGO == 3 || ALGO == 5)
+#	if(ALGO == 6)
+			uint2 ipbc_tmp = tweak1_2 ^ ((uint2 *)&(a[0]))[0];
+			((uint2 *)&(a[1]))[0] ^= ipbc_tmp;
+			Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
+			((uint2 *)&(a[1]))[0] ^= ipbc_tmp;
+#	else
 			((uint2 *)&(a[1]))[0] ^= tweak1_2;
 			Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
 			((uint2 *)&(a[1]))[0] ^= tweak1_2;
+#	endif
+
 #else
 			Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
 #endif
@@ -671,7 +688,7 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 	uint ExpandedKey2[40];
 	ulong State[25];
 	uint4 text;
-	
+
 	const ulong gIdx = getIdx();
 
 	for(int i = get_local_id(1) * WORKSIZE + get_local_id(0);
@@ -759,7 +776,7 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 			barrier(CLK_LOCAL_MEM_FENCE);
 			text = mix_and_propagate(xin);
 		}
-		
+
 #else
 		#pragma unroll 2
 		for(int i = 0; i < (MEMORY >> 7); ++i)
@@ -835,7 +852,7 @@ R"===(
 __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
 {
 	const ulong idx = get_global_id(0) - get_global_offset(0);
-	
+
 	// do not use early return here
 	if(idx < Threads)
 	{
@@ -887,7 +904,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
 				output[outIdx] = BranchBuf[idx] + get_global_offset(0);
 		}
 	}
-	mem_fence(CLK_GLOBAL_MEM_FENCE);	
+	mem_fence(CLK_GLOBAL_MEM_FENCE);
 }
 
 #define SWAP8(x)	as_ulong(as_uchar8(x).s76543210)
@@ -916,7 +933,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
 __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
 {
 	const uint idx = get_global_id(0) - get_global_offset(0);
-	
+
 	// do not use early return here
 	if(idx < Threads)
 	{
@@ -970,12 +987,12 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
 __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
 {
 	const uint idx = get_global_id(0) - get_global_offset(0);
-	
+
 	// do not use early return here
 	if(idx < Threads)
 	{
 		states += 25 * BranchBuf[idx];
-	
+
 		unsigned int m[16];
 		unsigned int v[16];
 		uint h[8];
@@ -1046,7 +1063,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
 __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, ulong Threads)
 {
 	const uint idx = get_global_id(0) - get_global_offset(0);
-	
+
 	// do not use early return here
 	if(idx < Threads)
 	{
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl b/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl
index 1a7c96f..2260385 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl
@@ -58,69 +58,69 @@ R"===(
 
 static const __constant ulong T0_G[] =
 {
-	0xc6a597f4a5f432c6UL, 0xf884eb9784976ff8UL, 0xee99c7b099b05eeeUL, 0xf68df78c8d8c7af6UL, 
-	0xff0de5170d17e8ffUL, 0xd6bdb7dcbddc0ad6UL, 0xdeb1a7c8b1c816deUL, 0x915439fc54fc6d91UL, 
-	0x6050c0f050f09060UL, 0x0203040503050702UL, 0xcea987e0a9e02eceUL, 0x567dac877d87d156UL, 
-	0xe719d52b192bcce7UL, 0xb56271a662a613b5UL, 0x4de69a31e6317c4dUL, 0xec9ac3b59ab559ecUL, 
-	0x8f4505cf45cf408fUL, 0x1f9d3ebc9dbca31fUL, 0x894009c040c04989UL, 0xfa87ef92879268faUL, 
-	0xef15c53f153fd0efUL, 0xb2eb7f26eb2694b2UL, 0x8ec90740c940ce8eUL, 0xfb0bed1d0b1de6fbUL, 
-	0x41ec822fec2f6e41UL, 0xb3677da967a91ab3UL, 0x5ffdbe1cfd1c435fUL, 0x45ea8a25ea256045UL, 
-	0x23bf46dabfdaf923UL, 0x53f7a602f7025153UL, 0xe496d3a196a145e4UL, 0x9b5b2ded5bed769bUL, 
-	0x75c2ea5dc25d2875UL, 0xe11cd9241c24c5e1UL, 0x3dae7ae9aee9d43dUL, 0x4c6a98be6abef24cUL, 
-	0x6c5ad8ee5aee826cUL, 0x7e41fcc341c3bd7eUL, 0xf502f1060206f3f5UL, 0x834f1dd14fd15283UL, 
-	0x685cd0e45ce48c68UL, 0x51f4a207f4075651UL, 0xd134b95c345c8dd1UL, 0xf908e9180818e1f9UL, 
-	0xe293dfae93ae4ce2UL, 0xab734d9573953eabUL, 0x6253c4f553f59762UL, 0x2a3f54413f416b2aUL, 
-	0x080c10140c141c08UL, 0x955231f652f66395UL, 0x46658caf65afe946UL, 0x9d5e21e25ee27f9dUL, 
-	0x3028607828784830UL, 0x37a16ef8a1f8cf37UL, 0x0a0f14110f111b0aUL, 0x2fb55ec4b5c4eb2fUL, 
-	0x0e091c1b091b150eUL, 0x2436485a365a7e24UL, 0x1b9b36b69bb6ad1bUL, 0xdf3da5473d4798dfUL, 
-	0xcd26816a266aa7cdUL, 0x4e699cbb69bbf54eUL, 0x7fcdfe4ccd4c337fUL, 0xea9fcfba9fba50eaUL, 
-	0x121b242d1b2d3f12UL, 0x1d9e3ab99eb9a41dUL, 0x5874b09c749cc458UL, 0x342e68722e724634UL, 
-	0x362d6c772d774136UL, 0xdcb2a3cdb2cd11dcUL, 0xb4ee7329ee299db4UL, 0x5bfbb616fb164d5bUL, 
-	0xa4f65301f601a5a4UL, 0x764decd74dd7a176UL, 0xb76175a361a314b7UL, 0x7dcefa49ce49347dUL, 
-	0x527ba48d7b8ddf52UL, 0xdd3ea1423e429fddUL, 0x5e71bc937193cd5eUL, 0x139726a297a2b113UL, 
-	0xa6f55704f504a2a6UL, 0xb96869b868b801b9UL, 0x0000000000000000UL, 0xc12c99742c74b5c1UL, 
-	0x406080a060a0e040UL, 0xe31fdd211f21c2e3UL, 0x79c8f243c8433a79UL, 0xb6ed772ced2c9ab6UL, 
-	0xd4beb3d9bed90dd4UL, 0x8d4601ca46ca478dUL, 0x67d9ce70d9701767UL, 0x724be4dd4bddaf72UL, 
-	0x94de3379de79ed94UL, 0x98d42b67d467ff98UL, 0xb0e87b23e82393b0UL, 0x854a11de4ade5b85UL, 
-	0xbb6b6dbd6bbd06bbUL, 0xc52a917e2a7ebbc5UL, 0x4fe59e34e5347b4fUL, 0xed16c13a163ad7edUL, 
-	0x86c51754c554d286UL, 0x9ad72f62d762f89aUL, 0x6655ccff55ff9966UL, 0x119422a794a7b611UL, 
-	0x8acf0f4acf4ac08aUL, 0xe910c9301030d9e9UL, 0x0406080a060a0e04UL, 0xfe81e798819866feUL, 
-	0xa0f05b0bf00baba0UL, 0x7844f0cc44ccb478UL, 0x25ba4ad5bad5f025UL, 0x4be3963ee33e754bUL, 
-	0xa2f35f0ef30eaca2UL, 0x5dfeba19fe19445dUL, 0x80c01b5bc05bdb80UL, 0x058a0a858a858005UL, 
-	0x3fad7eecadecd33fUL, 0x21bc42dfbcdffe21UL, 0x7048e0d848d8a870UL, 0xf104f90c040cfdf1UL, 
-	0x63dfc67adf7a1963UL, 0x77c1ee58c1582f77UL, 0xaf75459f759f30afUL, 0x426384a563a5e742UL, 
-	0x2030405030507020UL, 0xe51ad12e1a2ecbe5UL, 0xfd0ee1120e12effdUL, 0xbf6d65b76db708bfUL, 
-	0x814c19d44cd45581UL, 0x1814303c143c2418UL, 0x26354c5f355f7926UL, 0xc32f9d712f71b2c3UL, 
-	0xbee16738e13886beUL, 0x35a26afda2fdc835UL, 0x88cc0b4fcc4fc788UL, 0x2e395c4b394b652eUL, 
-	0x93573df957f96a93UL, 0x55f2aa0df20d5855UL, 0xfc82e39d829d61fcUL, 0x7a47f4c947c9b37aUL, 
-	0xc8ac8befacef27c8UL, 0xbae76f32e73288baUL, 0x322b647d2b7d4f32UL, 0xe695d7a495a442e6UL, 
-	0xc0a09bfba0fb3bc0UL, 0x199832b398b3aa19UL, 0x9ed12768d168f69eUL, 0xa37f5d817f8122a3UL, 
-	0x446688aa66aaee44UL, 0x547ea8827e82d654UL, 0x3bab76e6abe6dd3bUL, 0x0b83169e839e950bUL, 
-	0x8cca0345ca45c98cUL, 0xc729957b297bbcc7UL, 0x6bd3d66ed36e056bUL, 0x283c50443c446c28UL, 
-	0xa779558b798b2ca7UL, 0xbce2633de23d81bcUL, 0x161d2c271d273116UL, 0xad76419a769a37adUL, 
-	0xdb3bad4d3b4d96dbUL, 0x6456c8fa56fa9e64UL, 0x744ee8d24ed2a674UL, 0x141e28221e223614UL, 
-	0x92db3f76db76e492UL, 0x0c0a181e0a1e120cUL, 0x486c90b46cb4fc48UL, 0xb8e46b37e4378fb8UL, 
-	0x9f5d25e75de7789fUL, 0xbd6e61b26eb20fbdUL, 0x43ef862aef2a6943UL, 0xc4a693f1a6f135c4UL, 
-	0x39a872e3a8e3da39UL, 0x31a462f7a4f7c631UL, 0xd337bd5937598ad3UL, 0xf28bff868b8674f2UL, 
-	0xd532b156325683d5UL, 0x8b430dc543c54e8bUL, 0x6e59dceb59eb856eUL, 0xdab7afc2b7c218daUL, 
-	0x018c028f8c8f8e01UL, 0xb16479ac64ac1db1UL, 0x9cd2236dd26df19cUL, 0x49e0923be03b7249UL, 
-	0xd8b4abc7b4c71fd8UL, 0xacfa4315fa15b9acUL, 0xf307fd090709faf3UL, 0xcf25856f256fa0cfUL, 
-	0xcaaf8feaafea20caUL, 0xf48ef3898e897df4UL, 0x47e98e20e9206747UL, 0x1018202818283810UL, 
-	0x6fd5de64d5640b6fUL, 0xf088fb83888373f0UL, 0x4a6f94b16fb1fb4aUL, 0x5c72b8967296ca5cUL, 
-	0x3824706c246c5438UL, 0x57f1ae08f1085f57UL, 0x73c7e652c7522173UL, 0x975135f351f36497UL, 
-	0xcb238d652365aecbUL, 0xa17c59847c8425a1UL, 0xe89ccbbf9cbf57e8UL, 0x3e217c6321635d3eUL, 
-	0x96dd377cdd7cea96UL, 0x61dcc27fdc7f1e61UL, 0x0d861a9186919c0dUL, 0x0f851e9485949b0fUL, 
-	0xe090dbab90ab4be0UL, 0x7c42f8c642c6ba7cUL, 0x71c4e257c4572671UL, 0xccaa83e5aae529ccUL, 
-	0x90d83b73d873e390UL, 0x06050c0f050f0906UL, 0xf701f5030103f4f7UL, 0x1c12383612362a1cUL, 
-	0xc2a39ffea3fe3cc2UL, 0x6a5fd4e15fe18b6aUL, 0xaef94710f910beaeUL, 0x69d0d26bd06b0269UL, 
-	0x17912ea891a8bf17UL, 0x995829e858e87199UL, 0x3a2774692769533aUL, 0x27b94ed0b9d0f727UL, 
-	0xd938a948384891d9UL, 0xeb13cd351335deebUL, 0x2bb356ceb3cee52bUL, 0x2233445533557722UL, 
-	0xd2bbbfd6bbd604d2UL, 0xa9704990709039a9UL, 0x07890e8089808707UL, 0x33a766f2a7f2c133UL, 
-	0x2db65ac1b6c1ec2dUL, 0x3c22786622665a3cUL, 0x15922aad92adb815UL, 0xc92089602060a9c9UL, 
-	0x874915db49db5c87UL, 0xaaff4f1aff1ab0aaUL, 0x5078a0887888d850UL, 0xa57a518e7a8e2ba5UL, 
-	0x038f068a8f8a8903UL, 0x59f8b213f8134a59UL, 0x0980129b809b9209UL, 0x1a1734391739231aUL, 
-	0x65daca75da751065UL, 0xd731b553315384d7UL, 0x84c61351c651d584UL, 0xd0b8bbd3b8d303d0UL, 
-	0x82c31f5ec35edc82UL, 0x29b052cbb0cbe229UL, 0x5a77b4997799c35aUL, 0x1e113c3311332d1eUL, 
+	0xc6a597f4a5f432c6UL, 0xf884eb9784976ff8UL, 0xee99c7b099b05eeeUL, 0xf68df78c8d8c7af6UL,
+	0xff0de5170d17e8ffUL, 0xd6bdb7dcbddc0ad6UL, 0xdeb1a7c8b1c816deUL, 0x915439fc54fc6d91UL,
+	0x6050c0f050f09060UL, 0x0203040503050702UL, 0xcea987e0a9e02eceUL, 0x567dac877d87d156UL,
+	0xe719d52b192bcce7UL, 0xb56271a662a613b5UL, 0x4de69a31e6317c4dUL, 0xec9ac3b59ab559ecUL,
+	0x8f4505cf45cf408fUL, 0x1f9d3ebc9dbca31fUL, 0x894009c040c04989UL, 0xfa87ef92879268faUL,
+	0xef15c53f153fd0efUL, 0xb2eb7f26eb2694b2UL, 0x8ec90740c940ce8eUL, 0xfb0bed1d0b1de6fbUL,
+	0x41ec822fec2f6e41UL, 0xb3677da967a91ab3UL, 0x5ffdbe1cfd1c435fUL, 0x45ea8a25ea256045UL,
+	0x23bf46dabfdaf923UL, 0x53f7a602f7025153UL, 0xe496d3a196a145e4UL, 0x9b5b2ded5bed769bUL,
+	0x75c2ea5dc25d2875UL, 0xe11cd9241c24c5e1UL, 0x3dae7ae9aee9d43dUL, 0x4c6a98be6abef24cUL,
+	0x6c5ad8ee5aee826cUL, 0x7e41fcc341c3bd7eUL, 0xf502f1060206f3f5UL, 0x834f1dd14fd15283UL,
+	0x685cd0e45ce48c68UL, 0x51f4a207f4075651UL, 0xd134b95c345c8dd1UL, 0xf908e9180818e1f9UL,
+	0xe293dfae93ae4ce2UL, 0xab734d9573953eabUL, 0x6253c4f553f59762UL, 0x2a3f54413f416b2aUL,
+	0x080c10140c141c08UL, 0x955231f652f66395UL, 0x46658caf65afe946UL, 0x9d5e21e25ee27f9dUL,
+	0x3028607828784830UL, 0x37a16ef8a1f8cf37UL, 0x0a0f14110f111b0aUL, 0x2fb55ec4b5c4eb2fUL,
+	0x0e091c1b091b150eUL, 0x2436485a365a7e24UL, 0x1b9b36b69bb6ad1bUL, 0xdf3da5473d4798dfUL,
+	0xcd26816a266aa7cdUL, 0x4e699cbb69bbf54eUL, 0x7fcdfe4ccd4c337fUL, 0xea9fcfba9fba50eaUL,
+	0x121b242d1b2d3f12UL, 0x1d9e3ab99eb9a41dUL, 0x5874b09c749cc458UL, 0x342e68722e724634UL,
+	0x362d6c772d774136UL, 0xdcb2a3cdb2cd11dcUL, 0xb4ee7329ee299db4UL, 0x5bfbb616fb164d5bUL,
+	0xa4f65301f601a5a4UL, 0x764decd74dd7a176UL, 0xb76175a361a314b7UL, 0x7dcefa49ce49347dUL,
+	0x527ba48d7b8ddf52UL, 0xdd3ea1423e429fddUL, 0x5e71bc937193cd5eUL, 0x139726a297a2b113UL,
+	0xa6f55704f504a2a6UL, 0xb96869b868b801b9UL, 0x0000000000000000UL, 0xc12c99742c74b5c1UL,
+	0x406080a060a0e040UL, 0xe31fdd211f21c2e3UL, 0x79c8f243c8433a79UL, 0xb6ed772ced2c9ab6UL,
+	0xd4beb3d9bed90dd4UL, 0x8d4601ca46ca478dUL, 0x67d9ce70d9701767UL, 0x724be4dd4bddaf72UL,
+	0x94de3379de79ed94UL, 0x98d42b67d467ff98UL, 0xb0e87b23e82393b0UL, 0x854a11de4ade5b85UL,
+	0xbb6b6dbd6bbd06bbUL, 0xc52a917e2a7ebbc5UL, 0x4fe59e34e5347b4fUL, 0xed16c13a163ad7edUL,
+	0x86c51754c554d286UL, 0x9ad72f62d762f89aUL, 0x6655ccff55ff9966UL, 0x119422a794a7b611UL,
+	0x8acf0f4acf4ac08aUL, 0xe910c9301030d9e9UL, 0x0406080a060a0e04UL, 0xfe81e798819866feUL,
+	0xa0f05b0bf00baba0UL, 0x7844f0cc44ccb478UL, 0x25ba4ad5bad5f025UL, 0x4be3963ee33e754bUL,
+	0xa2f35f0ef30eaca2UL, 0x5dfeba19fe19445dUL, 0x80c01b5bc05bdb80UL, 0x058a0a858a858005UL,
+	0x3fad7eecadecd33fUL, 0x21bc42dfbcdffe21UL, 0x7048e0d848d8a870UL, 0xf104f90c040cfdf1UL,
+	0x63dfc67adf7a1963UL, 0x77c1ee58c1582f77UL, 0xaf75459f759f30afUL, 0x426384a563a5e742UL,
+	0x2030405030507020UL, 0xe51ad12e1a2ecbe5UL, 0xfd0ee1120e12effdUL, 0xbf6d65b76db708bfUL,
+	0x814c19d44cd45581UL, 0x1814303c143c2418UL, 0x26354c5f355f7926UL, 0xc32f9d712f71b2c3UL,
+	0xbee16738e13886beUL, 0x35a26afda2fdc835UL, 0x88cc0b4fcc4fc788UL, 0x2e395c4b394b652eUL,
+	0x93573df957f96a93UL, 0x55f2aa0df20d5855UL, 0xfc82e39d829d61fcUL, 0x7a47f4c947c9b37aUL,
+	0xc8ac8befacef27c8UL, 0xbae76f32e73288baUL, 0x322b647d2b7d4f32UL, 0xe695d7a495a442e6UL,
+	0xc0a09bfba0fb3bc0UL, 0x199832b398b3aa19UL, 0x9ed12768d168f69eUL, 0xa37f5d817f8122a3UL,
+	0x446688aa66aaee44UL, 0x547ea8827e82d654UL, 0x3bab76e6abe6dd3bUL, 0x0b83169e839e950bUL,
+	0x8cca0345ca45c98cUL, 0xc729957b297bbcc7UL, 0x6bd3d66ed36e056bUL, 0x283c50443c446c28UL,
+	0xa779558b798b2ca7UL, 0xbce2633de23d81bcUL, 0x161d2c271d273116UL, 0xad76419a769a37adUL,
+	0xdb3bad4d3b4d96dbUL, 0x6456c8fa56fa9e64UL, 0x744ee8d24ed2a674UL, 0x141e28221e223614UL,
+	0x92db3f76db76e492UL, 0x0c0a181e0a1e120cUL, 0x486c90b46cb4fc48UL, 0xb8e46b37e4378fb8UL,
+	0x9f5d25e75de7789fUL, 0xbd6e61b26eb20fbdUL, 0x43ef862aef2a6943UL, 0xc4a693f1a6f135c4UL,
+	0x39a872e3a8e3da39UL, 0x31a462f7a4f7c631UL, 0xd337bd5937598ad3UL, 0xf28bff868b8674f2UL,
+	0xd532b156325683d5UL, 0x8b430dc543c54e8bUL, 0x6e59dceb59eb856eUL, 0xdab7afc2b7c218daUL,
+	0x018c028f8c8f8e01UL, 0xb16479ac64ac1db1UL, 0x9cd2236dd26df19cUL, 0x49e0923be03b7249UL,
+	0xd8b4abc7b4c71fd8UL, 0xacfa4315fa15b9acUL, 0xf307fd090709faf3UL, 0xcf25856f256fa0cfUL,
+	0xcaaf8feaafea20caUL, 0xf48ef3898e897df4UL, 0x47e98e20e9206747UL, 0x1018202818283810UL,
+	0x6fd5de64d5640b6fUL, 0xf088fb83888373f0UL, 0x4a6f94b16fb1fb4aUL, 0x5c72b8967296ca5cUL,
+	0x3824706c246c5438UL, 0x57f1ae08f1085f57UL, 0x73c7e652c7522173UL, 0x975135f351f36497UL,
+	0xcb238d652365aecbUL, 0xa17c59847c8425a1UL, 0xe89ccbbf9cbf57e8UL, 0x3e217c6321635d3eUL,
+	0x96dd377cdd7cea96UL, 0x61dcc27fdc7f1e61UL, 0x0d861a9186919c0dUL, 0x0f851e9485949b0fUL,
+	0xe090dbab90ab4be0UL, 0x7c42f8c642c6ba7cUL, 0x71c4e257c4572671UL, 0xccaa83e5aae529ccUL,
+	0x90d83b73d873e390UL, 0x06050c0f050f0906UL, 0xf701f5030103f4f7UL, 0x1c12383612362a1cUL,
+	0xc2a39ffea3fe3cc2UL, 0x6a5fd4e15fe18b6aUL, 0xaef94710f910beaeUL, 0x69d0d26bd06b0269UL,
+	0x17912ea891a8bf17UL, 0x995829e858e87199UL, 0x3a2774692769533aUL, 0x27b94ed0b9d0f727UL,
+	0xd938a948384891d9UL, 0xeb13cd351335deebUL, 0x2bb356ceb3cee52bUL, 0x2233445533557722UL,
+	0xd2bbbfd6bbd604d2UL, 0xa9704990709039a9UL, 0x07890e8089808707UL, 0x33a766f2a7f2c133UL,
+	0x2db65ac1b6c1ec2dUL, 0x3c22786622665a3cUL, 0x15922aad92adb815UL, 0xc92089602060a9c9UL,
+	0x874915db49db5c87UL, 0xaaff4f1aff1ab0aaUL, 0x5078a0887888d850UL, 0xa57a518e7a8e2ba5UL,
+	0x038f068a8f8a8903UL, 0x59f8b213f8134a59UL, 0x0980129b809b9209UL, 0x1a1734391739231aUL,
+	0x65daca75da751065UL, 0xd731b553315384d7UL, 0x84c61351c651d584UL, 0xd0b8bbd3b8d303d0UL,
+	0x82c31f5ec35edc82UL, 0x29b052cbb0cbe229UL, 0x5a77b4997799c35aUL, 0x1e113c3311332d1eUL,
 	0x7bcbf646cb463d7bUL, 0xa8fc4b1ffc1fb7a8UL, 0x6dd6da61d6610c6dUL, 0x2c3a584e3a4e622cUL
 };
 
@@ -129,69 +129,69 @@ R"===(
 
 static const __constant ulong T4_G[] =
 {
-	0xA5F432C6C6A597F4UL, 0x84976FF8F884EB97UL, 0x99B05EEEEE99C7B0UL, 0x8D8C7AF6F68DF78CUL, 
-	0x0D17E8FFFF0DE517UL, 0xBDDC0AD6D6BDB7DCUL, 0xB1C816DEDEB1A7C8UL, 0x54FC6D91915439FCUL, 
-	0x50F090606050C0F0UL, 0x0305070202030405UL, 0xA9E02ECECEA987E0UL, 0x7D87D156567DAC87UL, 
-	0x192BCCE7E719D52BUL, 0x62A613B5B56271A6UL, 0xE6317C4D4DE69A31UL, 0x9AB559ECEC9AC3B5UL, 
-	0x45CF408F8F4505CFUL, 0x9DBCA31F1F9D3EBCUL, 0x40C04989894009C0UL, 0x879268FAFA87EF92UL, 
-	0x153FD0EFEF15C53FUL, 0xEB2694B2B2EB7F26UL, 0xC940CE8E8EC90740UL, 0x0B1DE6FBFB0BED1DUL, 
-	0xEC2F6E4141EC822FUL, 0x67A91AB3B3677DA9UL, 0xFD1C435F5FFDBE1CUL, 0xEA25604545EA8A25UL, 
-	0xBFDAF92323BF46DAUL, 0xF702515353F7A602UL, 0x96A145E4E496D3A1UL, 0x5BED769B9B5B2DEDUL, 
-	0xC25D287575C2EA5DUL, 0x1C24C5E1E11CD924UL, 0xAEE9D43D3DAE7AE9UL, 0x6ABEF24C4C6A98BEUL, 
-	0x5AEE826C6C5AD8EEUL, 0x41C3BD7E7E41FCC3UL, 0x0206F3F5F502F106UL, 0x4FD15283834F1DD1UL, 
-	0x5CE48C68685CD0E4UL, 0xF407565151F4A207UL, 0x345C8DD1D134B95CUL, 0x0818E1F9F908E918UL, 
-	0x93AE4CE2E293DFAEUL, 0x73953EABAB734D95UL, 0x53F597626253C4F5UL, 0x3F416B2A2A3F5441UL, 
-	0x0C141C08080C1014UL, 0x52F66395955231F6UL, 0x65AFE94646658CAFUL, 0x5EE27F9D9D5E21E2UL, 
-	0x2878483030286078UL, 0xA1F8CF3737A16EF8UL, 0x0F111B0A0A0F1411UL, 0xB5C4EB2F2FB55EC4UL, 
-	0x091B150E0E091C1BUL, 0x365A7E242436485AUL, 0x9BB6AD1B1B9B36B6UL, 0x3D4798DFDF3DA547UL, 
-	0x266AA7CDCD26816AUL, 0x69BBF54E4E699CBBUL, 0xCD4C337F7FCDFE4CUL, 0x9FBA50EAEA9FCFBAUL, 
-	0x1B2D3F12121B242DUL, 0x9EB9A41D1D9E3AB9UL, 0x749CC4585874B09CUL, 0x2E724634342E6872UL, 
-	0x2D774136362D6C77UL, 0xB2CD11DCDCB2A3CDUL, 0xEE299DB4B4EE7329UL, 0xFB164D5B5BFBB616UL, 
-	0xF601A5A4A4F65301UL, 0x4DD7A176764DECD7UL, 0x61A314B7B76175A3UL, 0xCE49347D7DCEFA49UL, 
-	0x7B8DDF52527BA48DUL, 0x3E429FDDDD3EA142UL, 0x7193CD5E5E71BC93UL, 0x97A2B113139726A2UL, 
-	0xF504A2A6A6F55704UL, 0x68B801B9B96869B8UL, 0x0000000000000000UL, 0x2C74B5C1C12C9974UL, 
-	0x60A0E040406080A0UL, 0x1F21C2E3E31FDD21UL, 0xC8433A7979C8F243UL, 0xED2C9AB6B6ED772CUL, 
-	0xBED90DD4D4BEB3D9UL, 0x46CA478D8D4601CAUL, 0xD970176767D9CE70UL, 0x4BDDAF72724BE4DDUL, 
-	0xDE79ED9494DE3379UL, 0xD467FF9898D42B67UL, 0xE82393B0B0E87B23UL, 0x4ADE5B85854A11DEUL, 
-	0x6BBD06BBBB6B6DBDUL, 0x2A7EBBC5C52A917EUL, 0xE5347B4F4FE59E34UL, 0x163AD7EDED16C13AUL, 
-	0xC554D28686C51754UL, 0xD762F89A9AD72F62UL, 0x55FF99666655CCFFUL, 0x94A7B611119422A7UL, 
-	0xCF4AC08A8ACF0F4AUL, 0x1030D9E9E910C930UL, 0x060A0E040406080AUL, 0x819866FEFE81E798UL, 
-	0xF00BABA0A0F05B0BUL, 0x44CCB4787844F0CCUL, 0xBAD5F02525BA4AD5UL, 0xE33E754B4BE3963EUL, 
-	0xF30EACA2A2F35F0EUL, 0xFE19445D5DFEBA19UL, 0xC05BDB8080C01B5BUL, 0x8A858005058A0A85UL, 
-	0xADECD33F3FAD7EECUL, 0xBCDFFE2121BC42DFUL, 0x48D8A8707048E0D8UL, 0x040CFDF1F104F90CUL, 
-	0xDF7A196363DFC67AUL, 0xC1582F7777C1EE58UL, 0x759F30AFAF75459FUL, 0x63A5E742426384A5UL, 
-	0x3050702020304050UL, 0x1A2ECBE5E51AD12EUL, 0x0E12EFFDFD0EE112UL, 0x6DB708BFBF6D65B7UL, 
-	0x4CD45581814C19D4UL, 0x143C24181814303CUL, 0x355F792626354C5FUL, 0x2F71B2C3C32F9D71UL, 
-	0xE13886BEBEE16738UL, 0xA2FDC83535A26AFDUL, 0xCC4FC78888CC0B4FUL, 0x394B652E2E395C4BUL, 
-	0x57F96A9393573DF9UL, 0xF20D585555F2AA0DUL, 0x829D61FCFC82E39DUL, 0x47C9B37A7A47F4C9UL, 
-	0xACEF27C8C8AC8BEFUL, 0xE73288BABAE76F32UL, 0x2B7D4F32322B647DUL, 0x95A442E6E695D7A4UL, 
-	0xA0FB3BC0C0A09BFBUL, 0x98B3AA19199832B3UL, 0xD168F69E9ED12768UL, 0x7F8122A3A37F5D81UL, 
-	0x66AAEE44446688AAUL, 0x7E82D654547EA882UL, 0xABE6DD3B3BAB76E6UL, 0x839E950B0B83169EUL, 
-	0xCA45C98C8CCA0345UL, 0x297BBCC7C729957BUL, 0xD36E056B6BD3D66EUL, 0x3C446C28283C5044UL, 
-	0x798B2CA7A779558BUL, 0xE23D81BCBCE2633DUL, 0x1D273116161D2C27UL, 0x769A37ADAD76419AUL, 
-	0x3B4D96DBDB3BAD4DUL, 0x56FA9E646456C8FAUL, 0x4ED2A674744EE8D2UL, 0x1E223614141E2822UL, 
-	0xDB76E49292DB3F76UL, 0x0A1E120C0C0A181EUL, 0x6CB4FC48486C90B4UL, 0xE4378FB8B8E46B37UL, 
-	0x5DE7789F9F5D25E7UL, 0x6EB20FBDBD6E61B2UL, 0xEF2A694343EF862AUL, 0xA6F135C4C4A693F1UL, 
-	0xA8E3DA3939A872E3UL, 0xA4F7C63131A462F7UL, 0x37598AD3D337BD59UL, 0x8B8674F2F28BFF86UL, 
-	0x325683D5D532B156UL, 0x43C54E8B8B430DC5UL, 0x59EB856E6E59DCEBUL, 0xB7C218DADAB7AFC2UL, 
-	0x8C8F8E01018C028FUL, 0x64AC1DB1B16479ACUL, 0xD26DF19C9CD2236DUL, 0xE03B724949E0923BUL, 
-	0xB4C71FD8D8B4ABC7UL, 0xFA15B9ACACFA4315UL, 0x0709FAF3F307FD09UL, 0x256FA0CFCF25856FUL, 
-	0xAFEA20CACAAF8FEAUL, 0x8E897DF4F48EF389UL, 0xE920674747E98E20UL, 0x1828381010182028UL, 
-	0xD5640B6F6FD5DE64UL, 0x888373F0F088FB83UL, 0x6FB1FB4A4A6F94B1UL, 0x7296CA5C5C72B896UL, 
-	0x246C54383824706CUL, 0xF1085F5757F1AE08UL, 0xC752217373C7E652UL, 0x51F36497975135F3UL, 
-	0x2365AECBCB238D65UL, 0x7C8425A1A17C5984UL, 0x9CBF57E8E89CCBBFUL, 0x21635D3E3E217C63UL, 
-	0xDD7CEA9696DD377CUL, 0xDC7F1E6161DCC27FUL, 0x86919C0D0D861A91UL, 0x85949B0F0F851E94UL, 
-	0x90AB4BE0E090DBABUL, 0x42C6BA7C7C42F8C6UL, 0xC457267171C4E257UL, 0xAAE529CCCCAA83E5UL, 
-	0xD873E39090D83B73UL, 0x050F090606050C0FUL, 0x0103F4F7F701F503UL, 0x12362A1C1C123836UL, 
-	0xA3FE3CC2C2A39FFEUL, 0x5FE18B6A6A5FD4E1UL, 0xF910BEAEAEF94710UL, 0xD06B026969D0D26BUL, 
-	0x91A8BF1717912EA8UL, 0x58E87199995829E8UL, 0x2769533A3A277469UL, 0xB9D0F72727B94ED0UL, 
-	0x384891D9D938A948UL, 0x1335DEEBEB13CD35UL, 0xB3CEE52B2BB356CEUL, 0x3355772222334455UL, 
-	0xBBD604D2D2BBBFD6UL, 0x709039A9A9704990UL, 0x8980870707890E80UL, 0xA7F2C13333A766F2UL, 
-	0xB6C1EC2D2DB65AC1UL, 0x22665A3C3C227866UL, 0x92ADB81515922AADUL, 0x2060A9C9C9208960UL, 
-	0x49DB5C87874915DBUL, 0xFF1AB0AAAAFF4F1AUL, 0x7888D8505078A088UL, 0x7A8E2BA5A57A518EUL, 
-	0x8F8A8903038F068AUL, 0xF8134A5959F8B213UL, 0x809B92090980129BUL, 0x1739231A1A173439UL, 
-	0xDA75106565DACA75UL, 0x315384D7D731B553UL, 0xC651D58484C61351UL, 0xB8D303D0D0B8BBD3UL, 
-	0xC35EDC8282C31F5EUL, 0xB0CBE22929B052CBUL, 0x7799C35A5A77B499UL, 0x11332D1E1E113C33UL, 
+	0xA5F432C6C6A597F4UL, 0x84976FF8F884EB97UL, 0x99B05EEEEE99C7B0UL, 0x8D8C7AF6F68DF78CUL,
+	0x0D17E8FFFF0DE517UL, 0xBDDC0AD6D6BDB7DCUL, 0xB1C816DEDEB1A7C8UL, 0x54FC6D91915439FCUL,
+	0x50F090606050C0F0UL, 0x0305070202030405UL, 0xA9E02ECECEA987E0UL, 0x7D87D156567DAC87UL,
+	0x192BCCE7E719D52BUL, 0x62A613B5B56271A6UL, 0xE6317C4D4DE69A31UL, 0x9AB559ECEC9AC3B5UL,
+	0x45CF408F8F4505CFUL, 0x9DBCA31F1F9D3EBCUL, 0x40C04989894009C0UL, 0x879268FAFA87EF92UL,
+	0x153FD0EFEF15C53FUL, 0xEB2694B2B2EB7F26UL, 0xC940CE8E8EC90740UL, 0x0B1DE6FBFB0BED1DUL,
+	0xEC2F6E4141EC822FUL, 0x67A91AB3B3677DA9UL, 0xFD1C435F5FFDBE1CUL, 0xEA25604545EA8A25UL,
+	0xBFDAF92323BF46DAUL, 0xF702515353F7A602UL, 0x96A145E4E496D3A1UL, 0x5BED769B9B5B2DEDUL,
+	0xC25D287575C2EA5DUL, 0x1C24C5E1E11CD924UL, 0xAEE9D43D3DAE7AE9UL, 0x6ABEF24C4C6A98BEUL,
+	0x5AEE826C6C5AD8EEUL, 0x41C3BD7E7E41FCC3UL, 0x0206F3F5F502F106UL, 0x4FD15283834F1DD1UL,
+	0x5CE48C68685CD0E4UL, 0xF407565151F4A207UL, 0x345C8DD1D134B95CUL, 0x0818E1F9F908E918UL,
+	0x93AE4CE2E293DFAEUL, 0x73953EABAB734D95UL, 0x53F597626253C4F5UL, 0x3F416B2A2A3F5441UL,
+	0x0C141C08080C1014UL, 0x52F66395955231F6UL, 0x65AFE94646658CAFUL, 0x5EE27F9D9D5E21E2UL,
+	0x2878483030286078UL, 0xA1F8CF3737A16EF8UL, 0x0F111B0A0A0F1411UL, 0xB5C4EB2F2FB55EC4UL,
+	0x091B150E0E091C1BUL, 0x365A7E242436485AUL, 0x9BB6AD1B1B9B36B6UL, 0x3D4798DFDF3DA547UL,
+	0x266AA7CDCD26816AUL, 0x69BBF54E4E699CBBUL, 0xCD4C337F7FCDFE4CUL, 0x9FBA50EAEA9FCFBAUL,
+	0x1B2D3F12121B242DUL, 0x9EB9A41D1D9E3AB9UL, 0x749CC4585874B09CUL, 0x2E724634342E6872UL,
+	0x2D774136362D6C77UL, 0xB2CD11DCDCB2A3CDUL, 0xEE299DB4B4EE7329UL, 0xFB164D5B5BFBB616UL,
+	0xF601A5A4A4F65301UL, 0x4DD7A176764DECD7UL, 0x61A314B7B76175A3UL, 0xCE49347D7DCEFA49UL,
+	0x7B8DDF52527BA48DUL, 0x3E429FDDDD3EA142UL, 0x7193CD5E5E71BC93UL, 0x97A2B113139726A2UL,
+	0xF504A2A6A6F55704UL, 0x68B801B9B96869B8UL, 0x0000000000000000UL, 0x2C74B5C1C12C9974UL,
+	0x60A0E040406080A0UL, 0x1F21C2E3E31FDD21UL, 0xC8433A7979C8F243UL, 0xED2C9AB6B6ED772CUL,
+	0xBED90DD4D4BEB3D9UL, 0x46CA478D8D4601CAUL, 0xD970176767D9CE70UL, 0x4BDDAF72724BE4DDUL,
+	0xDE79ED9494DE3379UL, 0xD467FF9898D42B67UL, 0xE82393B0B0E87B23UL, 0x4ADE5B85854A11DEUL,
+	0x6BBD06BBBB6B6DBDUL, 0x2A7EBBC5C52A917EUL, 0xE5347B4F4FE59E34UL, 0x163AD7EDED16C13AUL,
+	0xC554D28686C51754UL, 0xD762F89A9AD72F62UL, 0x55FF99666655CCFFUL, 0x94A7B611119422A7UL,
+	0xCF4AC08A8ACF0F4AUL, 0x1030D9E9E910C930UL, 0x060A0E040406080AUL, 0x819866FEFE81E798UL,
+	0xF00BABA0A0F05B0BUL, 0x44CCB4787844F0CCUL, 0xBAD5F02525BA4AD5UL, 0xE33E754B4BE3963EUL,
+	0xF30EACA2A2F35F0EUL, 0xFE19445D5DFEBA19UL, 0xC05BDB8080C01B5BUL, 0x8A858005058A0A85UL,
+	0xADECD33F3FAD7EECUL, 0xBCDFFE2121BC42DFUL, 0x48D8A8707048E0D8UL, 0x040CFDF1F104F90CUL,
+	0xDF7A196363DFC67AUL, 0xC1582F7777C1EE58UL, 0x759F30AFAF75459FUL, 0x63A5E742426384A5UL,
+	0x3050702020304050UL, 0x1A2ECBE5E51AD12EUL, 0x0E12EFFDFD0EE112UL, 0x6DB708BFBF6D65B7UL,
+	0x4CD45581814C19D4UL, 0x143C24181814303CUL, 0x355F792626354C5FUL, 0x2F71B2C3C32F9D71UL,
+	0xE13886BEBEE16738UL, 0xA2FDC83535A26AFDUL, 0xCC4FC78888CC0B4FUL, 0x394B652E2E395C4BUL,
+	0x57F96A9393573DF9UL, 0xF20D585555F2AA0DUL, 0x829D61FCFC82E39DUL, 0x47C9B37A7A47F4C9UL,
+	0xACEF27C8C8AC8BEFUL, 0xE73288BABAE76F32UL, 0x2B7D4F32322B647DUL, 0x95A442E6E695D7A4UL,
+	0xA0FB3BC0C0A09BFBUL, 0x98B3AA19199832B3UL, 0xD168F69E9ED12768UL, 0x7F8122A3A37F5D81UL,
+	0x66AAEE44446688AAUL, 0x7E82D654547EA882UL, 0xABE6DD3B3BAB76E6UL, 0x839E950B0B83169EUL,
+	0xCA45C98C8CCA0345UL, 0x297BBCC7C729957BUL, 0xD36E056B6BD3D66EUL, 0x3C446C28283C5044UL,
+	0x798B2CA7A779558BUL, 0xE23D81BCBCE2633DUL, 0x1D273116161D2C27UL, 0x769A37ADAD76419AUL,
+	0x3B4D96DBDB3BAD4DUL, 0x56FA9E646456C8FAUL, 0x4ED2A674744EE8D2UL, 0x1E223614141E2822UL,
+	0xDB76E49292DB3F76UL, 0x0A1E120C0C0A181EUL, 0x6CB4FC48486C90B4UL, 0xE4378FB8B8E46B37UL,
+	0x5DE7789F9F5D25E7UL, 0x6EB20FBDBD6E61B2UL, 0xEF2A694343EF862AUL, 0xA6F135C4C4A693F1UL,
+	0xA8E3DA3939A872E3UL, 0xA4F7C63131A462F7UL, 0x37598AD3D337BD59UL, 0x8B8674F2F28BFF86UL,
+	0x325683D5D532B156UL, 0x43C54E8B8B430DC5UL, 0x59EB856E6E59DCEBUL, 0xB7C218DADAB7AFC2UL,
+	0x8C8F8E01018C028FUL, 0x64AC1DB1B16479ACUL, 0xD26DF19C9CD2236DUL, 0xE03B724949E0923BUL,
+	0xB4C71FD8D8B4ABC7UL, 0xFA15B9ACACFA4315UL, 0x0709FAF3F307FD09UL, 0x256FA0CFCF25856FUL,
+	0xAFEA20CACAAF8FEAUL, 0x8E897DF4F48EF389UL, 0xE920674747E98E20UL, 0x1828381010182028UL,
+	0xD5640B6F6FD5DE64UL, 0x888373F0F088FB83UL, 0x6FB1FB4A4A6F94B1UL, 0x7296CA5C5C72B896UL,
+	0x246C54383824706CUL, 0xF1085F5757F1AE08UL, 0xC752217373C7E652UL, 0x51F36497975135F3UL,
+	0x2365AECBCB238D65UL, 0x7C8425A1A17C5984UL, 0x9CBF57E8E89CCBBFUL, 0x21635D3E3E217C63UL,
+	0xDD7CEA9696DD377CUL, 0xDC7F1E6161DCC27FUL, 0x86919C0D0D861A91UL, 0x85949B0F0F851E94UL,
+	0x90AB4BE0E090DBABUL, 0x42C6BA7C7C42F8C6UL, 0xC457267171C4E257UL, 0xAAE529CCCCAA83E5UL,
+	0xD873E39090D83B73UL, 0x050F090606050C0FUL, 0x0103F4F7F701F503UL, 0x12362A1C1C123836UL,
+	0xA3FE3CC2C2A39FFEUL, 0x5FE18B6A6A5FD4E1UL, 0xF910BEAEAEF94710UL, 0xD06B026969D0D26BUL,
+	0x91A8BF1717912EA8UL, 0x58E87199995829E8UL, 0x2769533A3A277469UL, 0xB9D0F72727B94ED0UL,
+	0x384891D9D938A948UL, 0x1335DEEBEB13CD35UL, 0xB3CEE52B2BB356CEUL, 0x3355772222334455UL,
+	0xBBD604D2D2BBBFD6UL, 0x709039A9A9704990UL, 0x8980870707890E80UL, 0xA7F2C13333A766F2UL,
+	0xB6C1EC2D2DB65AC1UL, 0x22665A3C3C227866UL, 0x92ADB81515922AADUL, 0x2060A9C9C9208960UL,
+	0x49DB5C87874915DBUL, 0xFF1AB0AAAAFF4F1AUL, 0x7888D8505078A088UL, 0x7A8E2BA5A57A518EUL,
+	0x8F8A8903038F068AUL, 0xF8134A5959F8B213UL, 0x809B92090980129BUL, 0x1739231A1A173439UL,
+	0xDA75106565DACA75UL, 0x315384D7D731B553UL, 0xC651D58484C61351UL, 0xB8D303D0D0B8BBD3UL,
+	0xC35EDC8282C31F5EUL, 0xB0CBE22929B052CBUL, 0x7799C35A5A77B499UL, 0x11332D1E1E113C33UL,
 	0xCB463D7B7BCBF646UL, 0xFC1FB7A8A8FC4B1FUL, 0xD6610C6D6DD6DA61UL, 0x3A4E622C2C3A584EUL
 };
 
@@ -292,4 +292,4 @@ static const __constant ulong T4_G[] =
 		} while (0)
 
 )==="
-		
-\ No newline at end of file
+
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/jh.cl b/xmrstak/backend/amd/amd_gpu/opencl/jh.cl
index fe70ea3..486d232 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/jh.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/jh.cl
@@ -6,7 +6,7 @@ R"===(
  * ==========================(LICENSE BEGIN)============================
  *
  * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining
  * a copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -14,10 +14,10 @@ R"===(
  * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -106,47 +106,47 @@ typedef ulong sph_u64;
 
 static const __constant ulong C[] =
 {
-	0x67F815DFA2DED572UL, 0x571523B70A15847BUL, 0xF6875A4D90D6AB81UL, 0x402BD1C3C54F9F4EUL, 
-	0x9CFA455CE03A98EAUL, 0x9A99B26699D2C503UL, 0x8A53BBF2B4960266UL, 0x31A2DB881A1456B5UL, 
-	0xDB0E199A5C5AA303UL, 0x1044C1870AB23F40UL, 0x1D959E848019051CUL, 0xDCCDE75EADEB336FUL, 
-	0x416BBF029213BA10UL, 0xD027BBF7156578DCUL, 0x5078AA3739812C0AUL, 0xD3910041D2BF1A3FUL, 
-	0x907ECCF60D5A2D42UL, 0xCE97C0929C9F62DDUL, 0xAC442BC70BA75C18UL, 0x23FCC663D665DFD1UL, 
-	0x1AB8E09E036C6E97UL, 0xA8EC6C447E450521UL, 0xFA618E5DBB03F1EEUL, 0x97818394B29796FDUL, 
-	0x2F3003DB37858E4AUL, 0x956A9FFB2D8D672AUL, 0x6C69B8F88173FE8AUL, 0x14427FC04672C78AUL, 
-	0xC45EC7BD8F15F4C5UL, 0x80BB118FA76F4475UL, 0xBC88E4AEB775DE52UL, 0xF4A3A6981E00B882UL, 
-	0x1563A3A9338FF48EUL, 0x89F9B7D524565FAAUL, 0xFDE05A7C20EDF1B6UL, 0x362C42065AE9CA36UL, 
-	0x3D98FE4E433529CEUL, 0xA74B9A7374F93A53UL, 0x86814E6F591FF5D0UL, 0x9F5AD8AF81AD9D0EUL, 
-	0x6A6234EE670605A7UL, 0x2717B96EBE280B8BUL, 0x3F1080C626077447UL, 0x7B487EC66F7EA0E0UL, 
-	0xC0A4F84AA50A550DUL, 0x9EF18E979FE7E391UL, 0xD48D605081727686UL, 0x62B0E5F3415A9E7EUL, 
-	0x7A205440EC1F9FFCUL, 0x84C9F4CE001AE4E3UL, 0xD895FA9DF594D74FUL, 0xA554C324117E2E55UL, 
-	0x286EFEBD2872DF5BUL, 0xB2C4A50FE27FF578UL, 0x2ED349EEEF7C8905UL, 0x7F5928EB85937E44UL, 
-	0x4A3124B337695F70UL, 0x65E4D61DF128865EUL, 0xE720B95104771BC7UL, 0x8A87D423E843FE74UL, 
-	0xF2947692A3E8297DUL, 0xC1D9309B097ACBDDUL, 0xE01BDC5BFB301B1DUL, 0xBF829CF24F4924DAUL, 
-	0xFFBF70B431BAE7A4UL, 0x48BCF8DE0544320DUL, 0x39D3BB5332FCAE3BUL, 0xA08B29E0C1C39F45UL, 
-	0x0F09AEF7FD05C9E5UL, 0x34F1904212347094UL, 0x95ED44E301B771A2UL, 0x4A982F4F368E3BE9UL, 
-	0x15F66CA0631D4088UL, 0xFFAF52874B44C147UL, 0x30C60AE2F14ABB7EUL, 0xE68C6ECCC5B67046UL, 
-	0x00CA4FBD56A4D5A4UL, 0xAE183EC84B849DDAUL, 0xADD1643045CE5773UL, 0x67255C1468CEA6E8UL, 
-	0x16E10ECBF28CDAA3UL, 0x9A99949A5806E933UL, 0x7B846FC220B2601FUL, 0x1885D1A07FACCED1UL, 
-	0xD319DD8DA15B5932UL, 0x46B4A5AAC01C9A50UL, 0xBA6B04E467633D9FUL, 0x7EEE560BAB19CAF6UL, 
-	0x742128A9EA79B11FUL, 0xEE51363B35F7BDE9UL, 0x76D350755AAC571DUL, 0x01707DA3FEC2463AUL, 
-	0x42D8A498AFC135F7UL, 0x79676B9E20ECED78UL, 0xA8DB3AEA15638341UL, 0x832C83324D3BC3FAUL, 
-	0xF347271C1F3B40A7UL, 0x9A762DB734F04059UL, 0xFD4F21D26C4E3EE7UL, 0xEF5957DC398DFDB8UL, 
-	0xDAEB492B490C9B8DUL, 0x0D70F36849D7A25BUL, 0x84558D7AD0AE3B7DUL, 0x658EF8E4F0E9A5F5UL, 
-	0x533B1036F4A2B8A0UL, 0x5AEC3E759E07A80CUL, 0x4F88E85692946891UL, 0x4CBCBAF8555CB05BUL, 
-	0x7B9487F3993BBBE3UL, 0x5D1C6B72D6F4DA75UL, 0x6DB334DC28ACAE64UL, 0x71DB28B850A5346CUL, 
-	0x2A518D10F2E261F8UL, 0xFC75DD593364DBE3UL, 0xA23FCE43F1BCAC1CUL, 0xB043E8023CD1BB67UL, 
-	0x75A12988CA5B0A33UL, 0x5C5316B44D19347FUL, 0x1E4D790EC3943B92UL, 0x3FAFEEB6D7757479UL, 
-	0x21391ABEF7D4A8EAUL, 0x5127234C097EF45CUL, 0xD23C32BA5324A326UL, 0xADD5A66D4A17A344UL, 
-	0x08C9F2AFA63E1DB5UL, 0x563C6B91983D5983UL, 0x4D608672A17CF84CUL, 0xF6C76E08CC3EE246UL, 
-	0x5E76BCB1B333982FUL, 0x2AE6C4EFA566D62BUL, 0x36D4C1BEE8B6F406UL, 0x6321EFBC1582EE74UL, 
-	0x69C953F40D4EC1FDUL, 0x26585806C45A7DA7UL, 0x16FAE0061614C17EUL, 0x3F9D63283DAF907EUL, 
-	0x0CD29B00E3F2C9D2UL, 0x300CD4B730CEAA5FUL, 0x9832E0F216512A74UL, 0x9AF8CEE3D830EB0DUL, 
-	0x9279F1B57B9EC54BUL, 0xD36886046EE651FFUL, 0x316796E6574D239BUL, 0x05750A17F3A6E6CCUL, 
-	0xCE6C3213D98176B1UL, 0x62A205F88452173CUL, 0x47154778B3CB2BF4UL, 0x486A9323825446FFUL, 
-	0x65655E4E0758DF38UL, 0x8E5086FC897CFCF2UL, 0x86CA0BD0442E7031UL, 0x4E477830A20940F0UL, 
-	0x8338F7D139EEA065UL, 0xBD3A2CE437E95EF7UL, 0x6FF8130126B29721UL, 0xE7DE9FEFD1ED44A3UL, 
-	0xD992257615DFA08BUL, 0xBE42DC12F6F7853CUL, 0x7EB027AB7CECA7D8UL, 0xDEA83EAADA7D8D53UL, 
-	0xD86902BD93CE25AAUL, 0xF908731AFD43F65AUL, 0xA5194A17DAEF5FC0UL, 0x6A21FD4C33664D97UL, 
+	0x67F815DFA2DED572UL, 0x571523B70A15847BUL, 0xF6875A4D90D6AB81UL, 0x402BD1C3C54F9F4EUL,
+	0x9CFA455CE03A98EAUL, 0x9A99B26699D2C503UL, 0x8A53BBF2B4960266UL, 0x31A2DB881A1456B5UL,
+	0xDB0E199A5C5AA303UL, 0x1044C1870AB23F40UL, 0x1D959E848019051CUL, 0xDCCDE75EADEB336FUL,
+	0x416BBF029213BA10UL, 0xD027BBF7156578DCUL, 0x5078AA3739812C0AUL, 0xD3910041D2BF1A3FUL,
+	0x907ECCF60D5A2D42UL, 0xCE97C0929C9F62DDUL, 0xAC442BC70BA75C18UL, 0x23FCC663D665DFD1UL,
+	0x1AB8E09E036C6E97UL, 0xA8EC6C447E450521UL, 0xFA618E5DBB03F1EEUL, 0x97818394B29796FDUL,
+	0x2F3003DB37858E4AUL, 0x956A9FFB2D8D672AUL, 0x6C69B8F88173FE8AUL, 0x14427FC04672C78AUL,
+	0xC45EC7BD8F15F4C5UL, 0x80BB118FA76F4475UL, 0xBC88E4AEB775DE52UL, 0xF4A3A6981E00B882UL,
+	0x1563A3A9338FF48EUL, 0x89F9B7D524565FAAUL, 0xFDE05A7C20EDF1B6UL, 0x362C42065AE9CA36UL,
+	0x3D98FE4E433529CEUL, 0xA74B9A7374F93A53UL, 0x86814E6F591FF5D0UL, 0x9F5AD8AF81AD9D0EUL,
+	0x6A6234EE670605A7UL, 0x2717B96EBE280B8BUL, 0x3F1080C626077447UL, 0x7B487EC66F7EA0E0UL,
+	0xC0A4F84AA50A550DUL, 0x9EF18E979FE7E391UL, 0xD48D605081727686UL, 0x62B0E5F3415A9E7EUL,
+	0x7A205440EC1F9FFCUL, 0x84C9F4CE001AE4E3UL, 0xD895FA9DF594D74FUL, 0xA554C324117E2E55UL,
+	0x286EFEBD2872DF5BUL, 0xB2C4A50FE27FF578UL, 0x2ED349EEEF7C8905UL, 0x7F5928EB85937E44UL,
+	0x4A3124B337695F70UL, 0x65E4D61DF128865EUL, 0xE720B95104771BC7UL, 0x8A87D423E843FE74UL,
+	0xF2947692A3E8297DUL, 0xC1D9309B097ACBDDUL, 0xE01BDC5BFB301B1DUL, 0xBF829CF24F4924DAUL,
+	0xFFBF70B431BAE7A4UL, 0x48BCF8DE0544320DUL, 0x39D3BB5332FCAE3BUL, 0xA08B29E0C1C39F45UL,
+	0x0F09AEF7FD05C9E5UL, 0x34F1904212347094UL, 0x95ED44E301B771A2UL, 0x4A982F4F368E3BE9UL,
+	0x15F66CA0631D4088UL, 0xFFAF52874B44C147UL, 0x30C60AE2F14ABB7EUL, 0xE68C6ECCC5B67046UL,
+	0x00CA4FBD56A4D5A4UL, 0xAE183EC84B849DDAUL, 0xADD1643045CE5773UL, 0x67255C1468CEA6E8UL,
+	0x16E10ECBF28CDAA3UL, 0x9A99949A5806E933UL, 0x7B846FC220B2601FUL, 0x1885D1A07FACCED1UL,
+	0xD319DD8DA15B5932UL, 0x46B4A5AAC01C9A50UL, 0xBA6B04E467633D9FUL, 0x7EEE560BAB19CAF6UL,
+	0x742128A9EA79B11FUL, 0xEE51363B35F7BDE9UL, 0x76D350755AAC571DUL, 0x01707DA3FEC2463AUL,
+	0x42D8A498AFC135F7UL, 0x79676B9E20ECED78UL, 0xA8DB3AEA15638341UL, 0x832C83324D3BC3FAUL,
+	0xF347271C1F3B40A7UL, 0x9A762DB734F04059UL, 0xFD4F21D26C4E3EE7UL, 0xEF5957DC398DFDB8UL,
+	0xDAEB492B490C9B8DUL, 0x0D70F36849D7A25BUL, 0x84558D7AD0AE3B7DUL, 0x658EF8E4F0E9A5F5UL,
+	0x533B1036F4A2B8A0UL, 0x5AEC3E759E07A80CUL, 0x4F88E85692946891UL, 0x4CBCBAF8555CB05BUL,
+	0x7B9487F3993BBBE3UL, 0x5D1C6B72D6F4DA75UL, 0x6DB334DC28ACAE64UL, 0x71DB28B850A5346CUL,
+	0x2A518D10F2E261F8UL, 0xFC75DD593364DBE3UL, 0xA23FCE43F1BCAC1CUL, 0xB043E8023CD1BB67UL,
+	0x75A12988CA5B0A33UL, 0x5C5316B44D19347FUL, 0x1E4D790EC3943B92UL, 0x3FAFEEB6D7757479UL,
+	0x21391ABEF7D4A8EAUL, 0x5127234C097EF45CUL, 0xD23C32BA5324A326UL, 0xADD5A66D4A17A344UL,
+	0x08C9F2AFA63E1DB5UL, 0x563C6B91983D5983UL, 0x4D608672A17CF84CUL, 0xF6C76E08CC3EE246UL,
+	0x5E76BCB1B333982FUL, 0x2AE6C4EFA566D62BUL, 0x36D4C1BEE8B6F406UL, 0x6321EFBC1582EE74UL,
+	0x69C953F40D4EC1FDUL, 0x26585806C45A7DA7UL, 0x16FAE0061614C17EUL, 0x3F9D63283DAF907EUL,
+	0x0CD29B00E3F2C9D2UL, 0x300CD4B730CEAA5FUL, 0x9832E0F216512A74UL, 0x9AF8CEE3D830EB0DUL,
+	0x9279F1B57B9EC54BUL, 0xD36886046EE651FFUL, 0x316796E6574D239BUL, 0x05750A17F3A6E6CCUL,
+	0xCE6C3213D98176B1UL, 0x62A205F88452173CUL, 0x47154778B3CB2BF4UL, 0x486A9323825446FFUL,
+	0x65655E4E0758DF38UL, 0x8E5086FC897CFCF2UL, 0x86CA0BD0442E7031UL, 0x4E477830A20940F0UL,
+	0x8338F7D139EEA065UL, 0xBD3A2CE437E95EF7UL, 0x6FF8130126B29721UL, 0xE7DE9FEFD1ED44A3UL,
+	0xD992257615DFA08BUL, 0xBE42DC12F6F7853CUL, 0x7EB027AB7CECA7D8UL, 0xDEA83EAADA7D8D53UL,
+	0xD86902BD93CE25AAUL, 0xF908731AFD43F65AUL, 0xA5194A17DAEF5FC0UL, 0x6A21FD4C33664D97UL,
 	0x701541DB3198B435UL, 0x9B54CDEDBB0F1EEAUL, 0x72409751A163D09AUL, 0xE26F4791BF9D75F6UL
 };
 
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl
index 279b652..73ef908 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl
@@ -50,19 +50,19 @@ ulong8 SkeinEvenRound(ulong8 p, const ulong8 h, const ulong *t, const uint s, co
 {
 	SKEIN_INJECT_KEY(p, s, q);
 	ulong4 pv0 = p.even, pv1 = p.odd;
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(46, 36, 19, 37));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(33, 27, 14, 42));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(17, 49, 36, 39));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(44, 9, 54, 56));
 	return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5)));
 }
@@ -71,19 +71,19 @@ ulong8 SkeinOddRound(ulong8 p, const ulong8 h, const ulong *t, const uint s, con
 {
 	SKEIN_INJECT_KEY(p, s, q);
     ulong4 pv0 = p.even, pv1 = p.odd;
-    
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(39, 30, 34, 24));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(13, 50, 10, 17));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(25, 29, 39, 43));
 	pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0));
 	pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1));
-	
+
 	SkeinMix8(&pv0, &pv1, (ulong4)(8, 35, 56, 22));
 	return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5)));
 }
@@ -129,7 +129,7 @@ ulong8 Skein512Block(ulong8 p, ulong8 h, ulong h8, const ulong *t)
 		h.s7 = h8;
 		h8 = tmp;
 	}
-	
+
 	p += h;
 	p.s5 += t[0];
 	p.s6 += t[1];
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 685890b..d6acec9 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -31,7 +31,7 @@ namespace amd
 {
 
 class autoAdjust
-{    
+{
 public:
 
 	autoAdjust()
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 4353e3d..88431cc 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -73,7 +73,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::th
 
 extern "C"  {
 #ifdef WIN32
-__declspec(dllexport) 
+__declspec(dllexport)
 #endif
 std::vector<iBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, environment& env)
 {
@@ -122,7 +122,7 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 		win_exit();
 	}
 
-	// \ todo get device count and exit if no opencl device 
+	// \ todo get device count and exit if no opencl device
 
 	if(!init_gpus())
 	{
@@ -139,7 +139,7 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 		jconf::inst()->GetThreadConfig(i, cfg);
 
 		const std::string backendName = xmrstak::params::inst().openCLVendor;
-		
+
 		if(cfg.cpu_aff >= 0)
 		{
 #if defined(__APPLE__)
@@ -158,27 +158,6 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	return pvThreads;
 }
 
-void minethd::switch_work(miner_work& pWork)
-{
-	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
-	// faster than threads can consume them. This should never happen in real life.
-	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
-
-	while (globalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < globalStates::inst().iThreadCount)
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-	globalStates::inst().oGlobalWork = pWork;
-	globalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst);
-	globalStates::inst().iGlobalJobNo++;
-}
-
-void minethd::consume_work()
-{
-	memcpy(&oWork, &globalStates::inst().oGlobalWork, sizeof(miner_work));
-	iJobNo++;
-	globalStates::inst().iConsumeCnt++;
-
-}
 
 void minethd::work_main()
 {
@@ -193,13 +172,11 @@ void minethd::work_main()
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
-	
+
 	// start with root algorithm and switch later if fork version is reached
 	auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
 
-	globalStates::inst().iConsumeCnt++;
-
 	uint8_t version = 0;
 	size_t lastPoolId = 0;
 
@@ -215,7 +192,7 @@ void minethd::work_main()
 			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
-			consume_work();
+			globalStates::inst().consume_work(oWork, iJobNo);
 			continue;
 		}
 
@@ -242,7 +219,7 @@ void minethd::work_main()
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		uint64_t target = oWork.iTarget;
-		
+
 		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo);
 
 		if(oWork.bNiceHash)
@@ -254,7 +231,11 @@ void minethd::work_main()
 			if((round_ctr++ & 0xF) == 0)
 			{
 				globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
+				// check if the job is still valid, there is a small possibility that the job is switched
+				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
+					break;
 			}
+			
 
 			cl_uint results[0x100];
 			memset(results,0,sizeof(cl_uint)*(0x100));
@@ -285,7 +266,7 @@ void minethd::work_main()
 			std::this_thread::yield();
 		}
 
-		consume_work();
+		globalStates::inst().consume_work(oWork, iJobNo);
 	}
 }
 
diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
index 29ddb74..3142117 100644
--- a/xmrstak/backend/amd/minethd.hpp
+++ b/xmrstak/backend/amd/minethd.hpp
@@ -20,7 +20,6 @@ class minethd  : public iBackend
 {
 public:
 
-	static void switch_work(miner_work& pWork);
 	static std::vector<iBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork);
 	static bool init_gpus();
 
@@ -30,11 +29,9 @@ private:
 	minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg);
 
 	void work_main();
-	void consume_work();
 
 	uint64_t iJobNo;
-
-	static miner_work oGlobalWork;
+	
 	miner_work oWork;
 
 	std::promise<void> order_fix;
diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp
index 6f80a0f..525413f 100644
--- a/xmrstak/backend/backendConnector.cpp
+++ b/xmrstak/backend/backendConnector.cpp
@@ -57,9 +57,6 @@ bool BackendConnector::self_test()
 
 std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
 {
-	globalStates::inst().iGlobalJobNo = 0;
-	globalStates::inst().iConsumeCnt = 0;
-
 
 	std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>;
 
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp
index 518721a..57dbef0 100644
--- a/xmrstak/backend/cpu/autoAdjust.hpp
+++ b/xmrstak/backend/cpu/autoAdjust.hpp
@@ -52,7 +52,7 @@ public:
 
 		std::string conf;
 
-		
+
 		if(!detectL3Size() || L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048u))
 		{
 			if(L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048))
@@ -127,7 +127,7 @@ private:
 
 			if(get_masked(cpu_info[0], 7, 5) != 3)
 			{
-				printer::inst()->print_msg(L0, "Autoconf failed: Couln't find L3 cache page.");
+				printer::inst()->print_msg(L0, "Autoconf failed: Couldn't find L3 cache page.");
 				return false;
 			}
 
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
index b1f3914..01d2280 100644
--- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -37,7 +37,7 @@ public:
 
 	bool printConfig()
 	{
-		
+
 		hwloc_topology_t topology;
 		hwloc_topology_init(&topology);
 		hwloc_topology_load(topology);
@@ -64,8 +64,8 @@ public:
 				throw(std::runtime_error("The CPU doesn't seem to have a cache."));
 
 			for(hwloc_obj_t obj : tlcs)
-				proccessTopLevelCache(obj);
-			
+				processTopLevelCache(obj);
+
 			for(uint32_t id : results)
 			{
 				conf += std::string("    { \"low_power_mode\" : ");
@@ -138,7 +138,7 @@ private:
 
 	// Top level cache isn't shared with other cores on the same package
 	// This will usually be 1 x L3, but can be 2 x L2 per package
-	void proccessTopLevelCache(hwloc_obj_t obj)
+	void processTopLevelCache(hwloc_obj_t obj)
 	{
 		if(obj->attr == nullptr)
 			throw(std::runtime_error("Cache object hasn't got attributes."));
@@ -158,7 +158,7 @@ private:
 
 			//Try our luck with lower level caches
 			for(size_t i=0; i < obj->arity; i++)
-				proccessTopLevelCache(obj->children[i]);
+				processTopLevelCache(obj->children[i]);
 			return;
 		}
 
diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl
index cb4b950..2fc9a47 100644
--- a/xmrstak/backend/cpu/config.tpl
+++ b/xmrstak/backend/cpu/config.tpl
@@ -2,25 +2,25 @@ R"===(
 /*
  * Thread configuration for each thread. Make sure it matches the number above.
  * low_power_mode - This can either be a boolean (true or false), or a number between 1 to 5. When set to true,
- *                  this mode will double the cache usage, and double the single thread performance. It will 
- *                  consume much less power (as less cores are working), but will max out at around 80-85% of 
+ *                  this mode will double the cache usage, and double the single thread performance. It will
+ *                  consume much less power (as less cores are working), but will max out at around 80-85% of
  *                  the maximum performance. When set to a number N greater than 1, this mode will increase the
  *                  cache usage and single thread performance by N times.
  *
- * no_prefetch -    Some sytems can gain up to extra 5% here, but sometimes it will have no difference or make
+ * no_prefetch -    Some systems can gain up to extra 5% here, but sometimes it will have no difference or make
  *                  things slower.
  *
- * affine_to_cpu -  This can be either false (no affinity), or the CPU core number. Note that on hyperthreading 
- *                  systems it is better to assign threads to physical cores. On Windows this usually means selecting 
- *                  even or odd numbered cpu numbers. For Linux it will be usually the lower CPU numbers, so for a 4 
+ * affine_to_cpu -  This can be either false (no affinity), or the CPU core number. Note that on hyperthreading
+ *                  systems it is better to assign threads to physical cores. On Windows this usually means selecting
+ *                  even or odd numbered cpu numbers. For Linux it will be usually the lower CPU numbers, so for a 4
  *                  physical core CPU you should select cpu numbers 0-3.
  *
  * On the first run the miner will look at your system and suggest a basic configuration that will work,
  * you can try to tweak it from there to get the best performance.
- * 
+ *
  * A filled out configuration should look like this:
  * "cpu_threads_conf" :
- * [ 
+ * [
  *      { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 },
  *      { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 },
  * ],
diff --git a/xmrstak/backend/cpu/crypto/c_groestl.c b/xmrstak/backend/cpu/crypto/c_groestl.c
index 1318d5a..5b3523e 100644
--- a/xmrstak/backend/cpu/crypto/c_groestl.c
+++ b/xmrstak/backend/cpu/crypto/c_groestl.c
@@ -4,7 +4,7 @@
  *
  *  This work is based on the implementation of
  *          Soeren S. Thomsen and Krystian Matusiewicz
- *          
+ *
  *
  */
 
@@ -22,7 +22,7 @@ const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
 #define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
 															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
 															v1 = temp_var;}
-  
+
 
 #define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
    tu = T[2*(uint32_t)x[4*c0+0]];			    \
@@ -161,11 +161,11 @@ static void F512(uint32_t *h, const uint32_t *m) {
 
 /* digest up to msglen bytes of input (full blocks only) */
 static void Transform(groestlHashState *ctx,
-	       const uint8_t *input, 
+	       const uint8_t *input,
 	       int msglen) {
 
   /* digest message, one block at a time */
-  for (; msglen >= SIZE512; 
+  for (; msglen >= SIZE512;
 	   msglen -= SIZE512, input += SIZE512) {
 	F512(ctx->chaining,(uint32_t*)input);
 
@@ -199,7 +199,7 @@ static void OutputTransformation(groestlHashState *ctx) {
 	RND512P((uint8_t*)y, temp, 0x00000009);
 	for (j = 0; j < 2*COLS512; j++) {
 	  ctx->chaining[j] ^= temp[j];
-	}									  
+	}
 }
 
 /* initialise context */
@@ -313,7 +313,7 @@ static void Final(groestlHashState* ctx,
 	ctx->block_counter2 >>= 8;
   }
   /* digest final padding block */
-  Transform(ctx, ctx->buffer, SIZE512); 
+  Transform(ctx, ctx->buffer, SIZE512);
   /* perform output transformation */
   OutputTransformation(ctx);
 
@@ -332,7 +332,7 @@ static void Final(groestlHashState* ctx,
 }
 
 /* hash bit sequence */
-void groestl(const BitSequence* data, 
+void groestl(const BitSequence* data,
 		DataLength databitlen,
 		BitSequence* hashval) {
 
diff --git a/xmrstak/backend/cpu/crypto/c_groestl.h b/xmrstak/backend/cpu/crypto/c_groestl.h
index 2b51339..47044b4 100644
--- a/xmrstak/backend/cpu/crypto/c_groestl.h
+++ b/xmrstak/backend/cpu/crypto/c_groestl.h
@@ -4,10 +4,10 @@
 #include "crypto_uint8.h"
 #include "crypto_uint32.h"
 #include "crypto_uint64.h"
-#include "crypto_hash.h" 
+#include "crypto_hash.h"
 
-typedef crypto_uint8 uint8_t; 
-typedef crypto_uint32 uint32_t; 
+typedef crypto_uint8 uint8_t;
+typedef crypto_uint32 uint32_t;
 typedef crypto_uint64 uint64_t;
 */
 #include <stdint.h>
@@ -42,7 +42,7 @@ typedef struct {
   BitSequence buffer[SIZE512];      /* data buffer */
   int buf_ptr;              /* data buffer pointer */
   int bits_in_last_byte;    /* no. of message bits in last byte of
-			       data buffer */
+                               data buffer */
 } groestlHashState;
 
 /*void Init(hashState*);
@@ -53,8 +53,8 @@ void groestl(const BitSequence*, DataLength, BitSequence*);
 
 /*
 int crypto_hash(unsigned char *out,
-		const unsigned char *in,
-		unsigned long long len);
+                const unsigned char *in,
+                unsigned long long len);
 */
 
 #endif /* __hash_h */
diff --git a/xmrstak/backend/cpu/crypto/c_jh.c b/xmrstak/backend/cpu/crypto/c_jh.c
index 9d685a0..0256a0f 100644
--- a/xmrstak/backend/cpu/crypto/c_jh.c
+++ b/xmrstak/backend/cpu/crypto/c_jh.c
@@ -234,7 +234,7 @@ static HashReturn Init(hashState *state, int hashbitlen)
 	  /*initialize the initial hash value of JH*/
 	  state->hashbitlen = hashbitlen;
 
-	  /*load the intital hash value into state*/
+	  /*load the initial hash value into state*/
 	  switch (hashbitlen)
 	  {
 			case 224: memcpy(state->x,JH224_H0,128); break;
diff --git a/xmrstak/backend/cpu/crypto/c_keccak.c b/xmrstak/backend/cpu/crypto/c_keccak.c
index eadb85b..63c1614 100644
--- a/xmrstak/backend/cpu/crypto/c_keccak.c
+++ b/xmrstak/backend/cpu/crypto/c_keccak.c
@@ -12,14 +12,14 @@
 #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 #endif
 
-const uint64_t keccakf_rndc[24] = 
+const uint64_t keccakf_rndc[24] =
 {
 	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
 	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
 	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
 	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
 	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-	0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 
+	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
 	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
 	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 };
@@ -130,7 +130,7 @@ void keccakf(uint64_t st[25], int rounds)
 		st[j + 2] ^= (~bc[3]) & bc[4];
 		st[j + 3] ^= (~bc[4]) & bc[0];
 		st[j + 4] ^= (~bc[0]) & bc[1];
-		
+
 		//  Iota
 		st[0] ^= keccakf_rndc[round];
 	}
@@ -147,7 +147,7 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
 
 	rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
 	rsizw = rsiz / 8;
-	
+
 	memset(st, 0, sizeof(st));
 
 	for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
@@ -155,7 +155,7 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
 			st[i] ^= ((uint64_t *) in)[i];
 		keccakf(st, KECCAK_ROUNDS);
 	}
-	
+
 	// last block and padding
 	memcpy(temp, in, inlen);
 	temp[inlen++] = 1;
@@ -173,4 +173,4 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
 void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
 {
 	keccak(in, inlen, md, sizeof(state_t));
-}
-\ No newline at end of file
+}
diff --git a/xmrstak/backend/cpu/crypto/c_skein.c b/xmrstak/backend/cpu/crypto/c_skein.c
index 2453713..e2d5442 100644
--- a/xmrstak/backend/cpu/crypto/c_skein.c
+++ b/xmrstak/backend/cpu/crypto/c_skein.c
@@ -5,7 +5,7 @@
 ** Source code author: Doug Whiting, 2008.
 **
 ** This algorithm and source code is released to the public domain.
-** 
+**
 ************************************************************************/
 
 #define  SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
@@ -96,12 +96,12 @@ static int  Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
 **   After an InitExt() call, just use Update/Final calls as with Init().
 **
 **   Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
-**          When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, 
+**          When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
 **              the results of InitExt() are identical to calling Init().
 **          The function Init() may be called once to "precompute" the IV for
 **              a given hashBitLen value, then by saving a copy of the context
 **              the IV computation may be avoided in later calls.
-**          Similarly, the function InitExt() may be called once per MAC key 
+**          Similarly, the function InitExt() may be called once per MAC key
 **              to precompute the MAC IV, then a copy of the context saved and
 **              reused for each new MAC computation.
 **/
@@ -135,7 +135,7 @@ static int  Skein1024_Output   (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
 
 /*****************************************************************
 ** "Internal" Skein definitions
-**    -- not needed for sequential hashing API, but will be 
+**    -- not needed for sequential hashing API, but will be
 **           helpful for other uses of Skein (e.g., tree hash mode).
 **    -- included here so that they can be shared between
 **           reference and optimized code.
@@ -257,11 +257,11 @@ static int  Skein1024_Output   (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
 #define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
 #define Skein_assert(x)
 #elif   defined(SKEIN_ASSERT)
-#include <assert.h>     
-#define Skein_Assert(x,retCode) assert(x) 
-#define Skein_assert(x)         assert(x) 
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x)         assert(x)
 #else
-#include <assert.h>     
+#include <assert.h>
 #define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /*  caller  error */
 #define Skein_assert(x)         assert(x)                     /* internal error */
 #endif
@@ -269,8 +269,8 @@ static int  Skein1024_Output   (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
 /*****************************************************************
 ** Skein block function constants (shared across Ref and Opt code)
 ******************************************************************/
-enum    
-{   
+enum
+{
   /* Skein_256 round rotation constants */
   R_256_0_0=14, R_256_0_1=16,
   R_256_1_0=52, R_256_1_1=57,
@@ -518,7 +518,7 @@ const u64b_t SKEIN1024_IV_1024[] =
 #define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
 #define KW_TWK_BASE     (0)
 #define KW_KEY_BASE     (3)
-#define ks              (kw + KW_KEY_BASE)                
+#define ks              (kw + KW_KEY_BASE)
 #define ts              (kw + KW_TWK_BASE)
 
 #ifdef SKEIN_DEBUG
@@ -567,7 +567,7 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s
 		ts[0] += byteCntAdd;                    /* update processed length */
 
 		/* precompute the key schedule for this block */
-		ks[0] = ctx->X[0];     
+		ks[0] = ctx->X[0];
 		ks[1] = ctx->X[1];
 		ks[2] = ctx->X[2];
 		ks[3] = ctx->X[3];
@@ -594,7 +594,7 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
 	X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
 
-#if SKEIN_UNROLL_256 == 0                       
+#if SKEIN_UNROLL_256 == 0
 #define R256(p0,p1,p2,p3,ROT,rNum)           /* fully unrolled */   \
 	Round256(p0,p1,p2,p3,ROT,rNum)                                  \
 	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
@@ -620,8 +620,8 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
 
 	for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256)  /* loop thru it */
-#endif  
-		{    
+#endif
+		{
 #define R256_8_rounds(R)                  \
 		R256(0,1,2,3,R_256_0,8*(R) + 1);  \
 		R256(0,3,2,1,R_256_1,8*(R) + 2);  \
@@ -762,7 +762,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
 		ks[5] = ctx->X[5];
 		ks[6] = ctx->X[6];
 		ks[7] = ctx->X[7];
-		ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ 
+		ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
 				ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
 
 		ts[2] = ts[0] ^ ts[1];
@@ -790,7 +790,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
 	X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
 
-#if SKEIN_UNROLL_512 == 0                       
+#if SKEIN_UNROLL_512 == 0
 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
 	Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
 	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
@@ -1022,7 +1022,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC;   \
 	X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE;   \
 
-#if SKEIN_UNROLL_1024 == 0                      
+#if SKEIN_UNROLL_1024 == 0
 #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
 	Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
 	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
@@ -1044,7 +1044,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	X13   += ks[((R)+14) % 17] + ts[((R)+1) % 3];                     \
 	X14   += ks[((R)+15) % 17] + ts[((R)+2) % 3];                     \
 	X15   += ks[((R)+16) % 17] +     (R)+1;                           \
-	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); 
+	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
 #else                                       /* looping version */
 #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
 	Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
@@ -1072,7 +1072,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s
 	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
 
 	for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024)    /* loop thru it */
-#endif  
+#endif
 		{
 #define R1024_8_rounds(R)    /* do 8 full rounds */                               \
 		R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
@@ -1156,7 +1156,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s
 		ctx->X[15] = X15 ^ w[15];
 
 		Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
-		
+
 		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 		blkPtr += SKEIN1024_BLOCK_BYTES;
 		}
@@ -1193,7 +1193,7 @@ static int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
 		u08b_t  b[SKEIN_256_STATE_BYTES];
 		u64b_t  w[SKEIN_256_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
 
@@ -1237,13 +1237,13 @@ static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 		u08b_t  b[SKEIN_256_STATE_BYTES];
 		u64b_t  w[SKEIN_256_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
 
 	/* compute the initial chaining values ctx->X[], based on key */
 	if (keyBytes == 0)                          /* is there a key? */
-		{                                   
+		{
 		memset(ctx->X,0,sizeof(ctx->X));        /* no key: use all zeroes as key for config block */
 		}
 	else                                        /* here to pre-process a key */
@@ -1282,7 +1282,7 @@ static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 	/* Set up to process the data message portion of the hash (default) */
 	ctx->h.bCnt = 0;                            /* buffer b[] starts out empty */
 	Skein_Start_New_Type(ctx,MSG);
-	
+
 	return SKEIN_SUCCESS;
 	}
 #endif
@@ -1334,7 +1334,7 @@ static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msg
 
 	return SKEIN_SUCCESS;
 	}
-   
+
 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize the hash computation and output the result */
 static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
@@ -1348,7 +1348,7 @@ static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
 
 	Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-	
+
 	/* now output the result */
 	byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
 
@@ -1391,7 +1391,7 @@ static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
 		u08b_t  b[SKEIN_512_STATE_BYTES];
 		u64b_t  w[SKEIN_512_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
 
@@ -1437,13 +1437,13 @@ static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 		u08b_t  b[SKEIN_512_STATE_BYTES];
 		u64b_t  w[SKEIN_512_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
 
 	/* compute the initial chaining values ctx->X[], based on key */
 	if (keyBytes == 0)                          /* is there a key? */
-		{                                   
+		{
 		memset(ctx->X,0,sizeof(ctx->X));        /* no key: use all zeroes as key for config block */
 		}
 	else                                        /* here to pre-process a key */
@@ -1482,7 +1482,7 @@ static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 	/* Set up to process the data message portion of the hash (default) */
 	ctx->h.bCnt = 0;                            /* buffer b[] starts out empty */
 	Skein_Start_New_Type(ctx,MSG);
-	
+
 	return SKEIN_SUCCESS;
 	}
 #endif
@@ -1534,7 +1534,7 @@ static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msg
 
 	return SKEIN_SUCCESS;
 	}
-   
+
 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize the hash computation and output the result */
 static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
@@ -1548,7 +1548,7 @@ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
 
 	Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-	
+
 	/* now output the result */
 	byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
 
@@ -1590,7 +1590,7 @@ static int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
 		u08b_t  b[SKEIN1024_STATE_BYTES];
 		u64b_t  w[SKEIN1024_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
 
@@ -1635,13 +1635,13 @@ static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 		u08b_t  b[SKEIN1024_STATE_BYTES];
 		u64b_t  w[SKEIN1024_STATE_WORDS];
 		} cfg;                              /* config block */
-		
+
 	Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
 	Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
 
 	/* compute the initial chaining values ctx->X[], based on key */
 	if (keyBytes == 0)                          /* is there a key? */
-		{                                   
+		{
 		memset(ctx->X,0,sizeof(ctx->X));        /* no key: use all zeroes as key for config block */
 		}
 	else                                        /* here to pre-process a key */
@@ -1680,7 +1680,7 @@ static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree
 	/* Set up to process the data message portion of the hash (default) */
 	ctx->h.bCnt = 0;                            /* buffer b[] starts out empty */
 	Skein_Start_New_Type(ctx,MSG);
-	
+
 	return SKEIN_SUCCESS;
 	}
 #endif
@@ -1732,7 +1732,7 @@ static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msg
 
 	return SKEIN_SUCCESS;
 	}
-   
+
 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize the hash computation and output the result */
 static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
@@ -1746,7 +1746,7 @@ static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
 
 	Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-	
+
 	/* now output the result */
 	byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
 
@@ -1790,9 +1790,9 @@ static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
 	if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)   /* zero pad b[] if necessary */
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
 	Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);    /* process the final block */
-	
+
 	Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES);   /* "output" the state bytes */
-	
+
 	return SKEIN_SUCCESS;
 	}
 
@@ -1806,9 +1806,9 @@ static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
 	if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)   /* zero pad b[] if necessary */
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
 	Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);    /* process the final block */
-	
+
 	Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES);   /* "output" the state bytes */
-	
+
 	return SKEIN_SUCCESS;
 	}
 
@@ -1822,9 +1822,9 @@ static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
 	if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)   /* zero pad b[] if necessary */
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
 	Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);    /* process the final block */
-	
+
 	Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES);   /* "output" the state bytes */
-	
+
 	return SKEIN_SUCCESS;
 	}
 
diff --git a/xmrstak/backend/cpu/crypto/c_skein.h b/xmrstak/backend/cpu/crypto/c_skein.h
index 86dbc08..1aa11de 100644
--- a/xmrstak/backend/cpu/crypto/c_skein.h
+++ b/xmrstak/backend/cpu/crypto/c_skein.h
@@ -9,7 +9,7 @@
 ** This algorithm and source code is released to the public domain.
 **
 ***************************************************************************
-** 
+**
 ** The following compile-time switches may be defined to control some
 ** tradeoffs between speed, code size, error checking, and security.
 **
@@ -20,8 +20,8 @@
 **                            [default: no callouts (no overhead)]
 **
 **  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
-**                            code. If not defined, most error checking 
-**                            is disabled (for performance). Otherwise, 
+**                            code. If not defined, most error checking
+**                            is disabled (for performance). Otherwise,
 **                            the switch value is interpreted as:
 **                                0: use assert()      to flag errors
 **                                1: return SKEIN_FAIL to flag errors
@@ -42,6 +42,6 @@ typedef u08b_t   SkeinBitSequence;               /* bit stream type */
 
 /* "all-in-one" call */
 SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence *data,
-		SkeinDataLength databitlen, SkeinBitSequence *hashval);
+                           SkeinDataLength databitlen, SkeinBitSequence *hashval);
 
 #endif  /* ifndef _SKEIN_H_ */
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 7562de1..e15c474 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -28,7 +28,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
 	*hi = r >> 64;
 	return (uint64_t)r;
 }
-#define _mm256_set_m128i(v0, v1)  _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
+
 #else
 #include <intrin.h>
 #endif // __GNUC__
@@ -422,6 +422,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 	_mm_store_si128(output + 11, xout7);
 }
 
+template<xmrstak_algo ALGO>
 inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
 {
 	mem_out[0] = _mm_cvtsi128_si64(tmp);
@@ -431,10 +432,21 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
 
 	uint8_t x = static_cast<uint8_t>(vh >> 24);
 	static const uint16_t table = 0x7531;
-	const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
-	vh ^= ((table >> index) & 0x3) << 28;
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc)
+	{
+		const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+		vh ^= ((table >> index) & 0x3) << 28;
+
+		mem_out[1] = vh;
+	}
+	else if(ALGO == cryptonight_stellite)
+	{
+		const uint8_t index = (((x >> 4) & 6) | (x & 1)) << 1;
+		vh ^= ((table >> index) & 0x3) << 28;
+
+		mem_out[1] = vh;
+	}
 
-	mem_out[1] = vh;
 }
 
 template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
@@ -444,7 +456,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
 	constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
 	{
 		memset(output, 0, 32);
 		return;
@@ -453,7 +465,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 	keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
 
 	uint64_t monero_const;
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
 	{
 		monero_const  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
 		monero_const ^=  *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24);
@@ -482,8 +494,8 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+			cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
 		else
 			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
 
@@ -506,8 +518,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 			_mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0);
 		ah0 += lo;
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+		{
+			if(ALGO == cryptonight_ipbc)
+				((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0];
+			else
+				((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+		}
 		else
 			((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
 		ah0 ^= ch;
@@ -544,7 +561,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
 	constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
 	{
 		memset(output, 0, 64);
 		return;
@@ -554,7 +571,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 	keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200);
 
 	uint64_t monero_const_0, monero_const_1;
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
 	{
 		monero_const_0  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
 		monero_const_0 ^=  *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24);
@@ -592,8 +609,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+			cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
 		else
 			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
 
@@ -610,8 +627,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+			cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
 		else
 			_mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
 
@@ -631,8 +648,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		axh0 += lo;
 		((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+		{
+			if(ALGO == cryptonight_ipbc)
+				((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0];
+			else
+				((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+		}
 		else
 			((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
 
@@ -662,8 +684,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		axh1 += lo;
 		((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
 
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
-			((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+		{
+			if(ALGO == cryptonight_ipbc)
+				((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0];
+			else
+				((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+		}
 		else
 			((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
 
@@ -701,7 +728,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 	ptr = (__m128i *)&l[idx & MASK];			\
 	if(PREFETCH)						\
 		_mm_prefetch((const char*)ptr, _MM_HINT_T0);	\
-	c = _mm_load_si128(ptr); 
+	c = _mm_load_si128(ptr);
 
 #define CN_STEP2(a, b, c, l, ptr, idx)				\
 	if(SOFT_AES)						\
@@ -709,8 +736,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 	else							\
 		c = _mm_aesenc_si128(c, a);			\
 	b = _mm_xor_si128(b, c);				\
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \
-		cryptonight_monero_tweak((uint64_t*)ptr, b); \
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \
+		cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \
 	else \
 		_mm_store_si128(ptr, b);\
 
@@ -724,8 +751,12 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 #define CN_STEP4(a, b, c, l, mc, ptr, idx)				\
 	lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi);		\
 	a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi));		\
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \
+	{ \
 		_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
+		if (ALGO == cryptonight_ipbc) \
+			((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\
+	} \
 	else \
 		_mm_store_si128(ptr, a);\
 	a = _mm_xor_si128(a, b); \
@@ -751,7 +782,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
 	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
 	constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
 	{
 		memset(output, 0, 32 * 3);
 		return;
@@ -845,7 +876,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
 	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
 	constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
 	{
 		memset(output, 0, 32 * 4);
 		return;
@@ -883,13 +914,13 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
 	__m128i cx1 = _mm_set_epi64x(0, 0);
 	__m128i cx2 = _mm_set_epi64x(0, 0);
 	__m128i cx3 = _mm_set_epi64x(0, 0);
-	
+
 	uint64_t idx0, idx1, idx2, idx3;
 	idx0 = _mm_cvtsi128_si64(ax0);
 	idx1 = _mm_cvtsi128_si64(ax1);
 	idx2 = _mm_cvtsi128_si64(ax2);
 	idx3 = _mm_cvtsi128_si64(ax3);
-	
+
 	for (size_t i = 0; i < ITERATIONS/2; i++)
 	{
 		uint64_t hi, lo;
@@ -954,7 +985,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
 	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
 	constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
 	{
 		memset(output, 0, 32 * 5);
 		return;
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
index ee3b663..a478c9b 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
+++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
@@ -118,7 +118,7 @@ BOOL AddLargePageRights()
 
 		DWORD size = 0;
 		GetTokenInformation(hToken, TokenUser, NULL, 0, &size);
-		
+
 		if (size > 0 && bIsElevated)
 		{
 			user = (PTOKEN_USER)LocalAlloc(LPTR, size);
@@ -136,7 +136,7 @@ BOOL AddLargePageRights()
 	ZeroMemory(&attributes, sizeof(attributes));
 
 	BOOL result = FALSE;
-	if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) 
+	if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0)
 	{
 		LSA_UNICODE_STRING lockmem;
 		lockmem.Buffer = L"SeLockMemoryPrivilege";
diff --git a/xmrstak/backend/cpu/crypto/soft_aes.hpp b/xmrstak/backend/cpu/crypto/soft_aes.hpp
index d3f4637..9b4ae0a 100644
--- a/xmrstak/backend/cpu/crypto/soft_aes.hpp
+++ b/xmrstak/backend/cpu/crypto/soft_aes.hpp
@@ -104,9 +104,9 @@ static inline __m128i soft_aesenc(__m128i in, __m128i key)
 
 static inline uint32_t sub_word(uint32_t key)
 {
-	return (saes_sbox[key >> 24 ] << 24)   | 
-		(saes_sbox[(key >> 16) & 0xff] << 16 ) | 
-		(saes_sbox[(key >> 8)  & 0xff] << 8  ) | 
+	return (saes_sbox[key >> 24 ] << 24)   |
+		(saes_sbox[(key >> 16) & 0xff] << 16 ) |
+		(saes_sbox[(key >> 8)  & 0xff] << 8  ) |
 		 saes_sbox[key & 0xff];
 }
 
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index f8f70f9..482c085 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -95,6 +95,7 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id
 	return pthread_setaffinity_np(h, sizeof(cpuset_t), &mn) == 0;
 #elif defined(__OpenBSD__)
         printer::inst()->print_msg(L0,"WARNING: thread pinning is not supported under OPENBSD.");
+        return true;
 #else
 	cpu_set_t mn;
 	CPU_ZERO(&mn);
@@ -285,7 +286,12 @@ bool minethd::self_test()
 	else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon)
 	{
 	}
-
+	else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_ipbc)
+	{
+	}
+	else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_stellite)
+	{
+	}
 	for (int i = 0; i < MAX_N; i++)
 		cryptonight_free_ctx(ctx[i]);
 
@@ -333,7 +339,7 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 		}
 		else
 			printer::inst()->print_msg(L1, "Starting %dx thread, no affinity.", cfg.iMultiway);
-		
+
 		minethd* thd = new minethd(pWork, i + threadOffset, cfg.iMultiway, cfg.bNoPrefetch, cfg.iCpuAff);
 		pvThreads.push_back(thd);
 	}
@@ -341,13 +347,6 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 	return pvThreads;
 }
 
-void minethd::consume_work()
-{
-	memcpy(&oWork, &globalStates::inst().inst().oGlobalWork, sizeof(miner_work));
-	iJobNo++;
-	globalStates::inst().inst().iConsumeCnt++;
-}
-
 minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
 {
 	// We have two independent flag bits in the functions
@@ -372,6 +371,12 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr
 	case cryptonight_aeon:
 		algv = 4;
 		break;
+	case cryptonight_ipbc:
+		algv = 5;
+		break;
+	case cryptonight_stellite:
+		algv = 6;
+		break;
 	default:
 		algv = 2;
 		break;
@@ -397,7 +402,15 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr
 		cryptonight_hash<cryptonight_aeon, false, false>,
 		cryptonight_hash<cryptonight_aeon, true, false>,
 		cryptonight_hash<cryptonight_aeon, false, true>,
-		cryptonight_hash<cryptonight_aeon, true, true>
+		cryptonight_hash<cryptonight_aeon, true, true>,
+		cryptonight_hash<cryptonight_ipbc, false, false>,
+		cryptonight_hash<cryptonight_ipbc, true, false>,
+		cryptonight_hash<cryptonight_ipbc, false, true>,
+		cryptonight_hash<cryptonight_ipbc, true, true>,
+		cryptonight_hash<cryptonight_stellite, false, false>,
+		cryptonight_hash<cryptonight_stellite, true, false>,
+		cryptonight_hash<cryptonight_stellite, false, true>,
+		cryptonight_hash<cryptonight_stellite, true, true>
 	};
 
 	std::bitset<2> digit;
@@ -430,7 +443,6 @@ void minethd::work_main()
 
 	piHashVal = (uint64_t*)(result.bResult + 24);
 	piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
-	globalStates::inst().inst().iConsumeCnt++;
 	result.iThreadId = iThreadNo;
 
 	uint8_t version = 0;
@@ -448,7 +460,7 @@ void minethd::work_main()
 			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
-			consume_work();
+			globalStates::inst().consume_work(oWork, iJobNo);
 			continue;
 		}
 
@@ -491,6 +503,9 @@ void minethd::work_main()
 			if((nonce_ctr++ & (nonce_chunk-1)) == 0)
 			{
 				globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
+				// check if the job is still valid, there is a small posibility that the job is switched
+				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
+					break;
 			}
 
 			*piNonce = result.iNonce;
@@ -504,7 +519,7 @@ void minethd::work_main()
 			std::this_thread::yield();
 		}
 
-		consume_work();
+		globalStates::inst().consume_work(oWork, iJobNo);
 	}
 
 	cryptonight_free_ctx(ctx);
@@ -534,6 +549,12 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes,
 	case cryptonight_aeon:
 		algv = 4;
 		break;
+	case cryptonight_ipbc:
+		algv = 5;
+		break;
+	case cryptonight_stellite:
+		algv = 6;
+		break;
 	default:
 		algv = 2;
 		break;
@@ -573,7 +594,7 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes,
 		cryptonight_penta_hash<cryptonight_lite, true, false>,
 		cryptonight_penta_hash<cryptonight_lite, false, true>,
 		cryptonight_penta_hash<cryptonight_lite, true, true>,
-		
+
 		cryptonight_double_hash<cryptonight, false, false>,
 		cryptonight_double_hash<cryptonight, true, false>,
 		cryptonight_double_hash<cryptonight, false, true>,
@@ -623,13 +644,47 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes,
 		cryptonight_penta_hash<cryptonight_aeon, false, false>,
 		cryptonight_penta_hash<cryptonight_aeon, true, false>,
 		cryptonight_penta_hash<cryptonight_aeon, false, true>,
-		cryptonight_penta_hash<cryptonight_aeon, true, true>
+		cryptonight_penta_hash<cryptonight_aeon, true, true>,
+
+		cryptonight_double_hash<cryptonight_ipbc, false, false>,
+		cryptonight_double_hash<cryptonight_ipbc, true, false>,
+		cryptonight_double_hash<cryptonight_ipbc, false, true>,
+		cryptonight_double_hash<cryptonight_ipbc, true, true>,
+		cryptonight_triple_hash<cryptonight_ipbc, false, false>,
+		cryptonight_triple_hash<cryptonight_ipbc, true, false>,
+		cryptonight_triple_hash<cryptonight_ipbc, false, true>,
+		cryptonight_triple_hash<cryptonight_ipbc, true, true>,
+		cryptonight_quad_hash<cryptonight_ipbc, false, false>,
+		cryptonight_quad_hash<cryptonight_ipbc, true, false>,
+		cryptonight_quad_hash<cryptonight_ipbc, false, true>,
+		cryptonight_quad_hash<cryptonight_ipbc, true, true>,
+		cryptonight_penta_hash<cryptonight_ipbc, false, false>,
+		cryptonight_penta_hash<cryptonight_ipbc, true, false>,
+		cryptonight_penta_hash<cryptonight_ipbc, false, true>,
+		cryptonight_penta_hash<cryptonight_ipbc, true, true>,
+
+		cryptonight_double_hash<cryptonight_stellite, false, false>,
+		cryptonight_double_hash<cryptonight_stellite, true, false>,
+		cryptonight_double_hash<cryptonight_stellite, false, true>,
+		cryptonight_double_hash<cryptonight_stellite, true, true>,
+		cryptonight_triple_hash<cryptonight_stellite, false, false>,
+		cryptonight_triple_hash<cryptonight_stellite, true, false>,
+		cryptonight_triple_hash<cryptonight_stellite, false, true>,
+		cryptonight_triple_hash<cryptonight_stellite, true, true>,
+		cryptonight_quad_hash<cryptonight_stellite, false, false>,
+		cryptonight_quad_hash<cryptonight_stellite, true, false>,
+		cryptonight_quad_hash<cryptonight_stellite, false, true>,
+		cryptonight_quad_hash<cryptonight_stellite, true, true>,
+		cryptonight_penta_hash<cryptonight_stellite, false, false>,
+		cryptonight_penta_hash<cryptonight_stellite, true, false>,
+		cryptonight_penta_hash<cryptonight_stellite, false, true>,
+		cryptonight_penta_hash<cryptonight_stellite, true, true>,
 	};
 
 	std::bitset<2> digit;
 	digit.set(0, !bHaveAes);
 	digit.set(1, !bNoPrefetch);
-	
+
 	return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()];
 }
 
@@ -713,7 +768,7 @@ void minethd::multiway_work_main()
 			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
-			consume_work();
+			globalStates::inst().consume_work(oWork, iJobNo);
 			prep_multiway_work<N>(bWorkBlob, piNonce);
 			continue;
 		}
@@ -758,6 +813,9 @@ void minethd::multiway_work_main()
 			{
 				globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk);
 				nonce_ctr = nonce_chunk;
+				// check if the job is still valid, there is a small posibility that the job is switched
+				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
+					break;
 			}
 
 			for (size_t i = 0; i < N; i++)
@@ -776,7 +834,7 @@ void minethd::multiway_work_main()
 			std::this_thread::yield();
 		}
 
-		consume_work();
+		globalStates::inst().consume_work(oWork, iJobNo);
 		prep_multiway_work<N>(bWorkBlob, piNonce);
 	}
 
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 85a95d1..2d40ce3 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -47,11 +47,8 @@ private:
 	void quad_work_main();
 	void penta_work_main();
 
-	void consume_work();
-
 	uint64_t iJobNo;
 
-	static miner_work oGlobalWork;
 	miner_work oWork;
 
 	std::promise<void> order_fix;
diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp
index 8a8e259..633ddf4 100644
--- a/xmrstak/backend/cryptonight.hpp
+++ b/xmrstak/backend/cryptonight.hpp
@@ -3,14 +3,16 @@
 #include <inttypes.h>
 #include <type_traits>
 
-enum xmrstak_algo 
+enum xmrstak_algo
 {
 	invalid_algo = 0,
 	cryptonight = 1,
 	cryptonight_lite = 2,
 	cryptonight_monero = 3,
 	cryptonight_heavy = 4,
-	cryptonight_aeon = 5
+	cryptonight_aeon = 5,
+	cryptonight_ipbc = 6, // equal to cryptonight_aeon with a small tweak in the miner code
+	cryptonight_stellite = 7 //equal to cryptonight_monero but with one tiny change
 };
 
 // define aeon settings
@@ -44,14 +46,22 @@ inline constexpr size_t cn_select_memory<cryptonight_heavy>() { return CRYPTONIG
 template<>
 inline constexpr size_t cn_select_memory<cryptonight_aeon>() { return CRYPTONIGHT_LITE_MEMORY; }
 
+template<>
+inline constexpr size_t cn_select_memory<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_MEMORY; }
+
+template<>
+inline constexpr size_t cn_select_memory<cryptonight_stellite>() { return CRYPTONIGHT_MEMORY; }
+
 
 inline size_t cn_select_memory(xmrstak_algo algo)
 {
 	switch(algo)
 	{
+	case cryptonight_stellite:
 	case cryptonight_monero:
 	case cryptonight:
-		return CRYPTONIGHT_MEMORY;	
+		return CRYPTONIGHT_MEMORY;
+	case cryptonight_ipbc:
 	case cryptonight_aeon:
 	case cryptonight_lite:
 		return CRYPTONIGHT_LITE_MEMORY;
@@ -80,13 +90,21 @@ inline constexpr uint32_t cn_select_mask<cryptonight_heavy>() { return CRYPTONIG
 template<>
 inline constexpr uint32_t cn_select_mask<cryptonight_aeon>() { return CRYPTONIGHT_LITE_MASK; }
 
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_MASK; }
+
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight_stellite>() { return CRYPTONIGHT_MASK; }
+
 inline size_t cn_select_mask(xmrstak_algo algo)
 {
 	switch(algo)
 	{
+	case cryptonight_stellite:
 	case cryptonight_monero:
 	case cryptonight:
 		return CRYPTONIGHT_MASK;
+	case cryptonight_ipbc:
 	case cryptonight_aeon:
 	case cryptonight_lite:
 		return CRYPTONIGHT_LITE_MASK;
@@ -115,13 +133,21 @@ inline constexpr uint32_t cn_select_iter<cryptonight_heavy>() { return CRYPTONIG
 template<>
 inline constexpr uint32_t cn_select_iter<cryptonight_aeon>() { return CRYPTONIGHT_LITE_ITER; }
 
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_ITER; }
+
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight_stellite>() { return CRYPTONIGHT_ITER; }
+
 inline size_t cn_select_iter(xmrstak_algo algo)
 {
 	switch(algo)
 	{
+	case cryptonight_stellite:
 	case cryptonight_monero:
 	case cryptonight:
 		return CRYPTONIGHT_ITER;
+	case cryptonight_ipbc:
 	case cryptonight_aeon:
 	case cryptonight_lite:
 		return CRYPTONIGHT_LITE_ITER;
diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 1ec7983..3bd7d0e 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -33,24 +33,37 @@
 namespace xmrstak
 {
 
+void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
+{
+	jobLock.ReadLock();
+
+	threadWork = oGlobalWork;
+	currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);
+	
+	jobLock.UnLock();
+}
 
 void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
-	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
-	// faster than threads can consume them. This should never happen in real life.
-	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
-
-	while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
+	jobLock.WriteLock();
 
+	/* This notifies all threads that the job has changed.
+	 * To avoid duplicated shared this must be done before the nonce is exchanged.
+	 */
+	iGlobalJobNo++;
+	
 	size_t xid = dat.pool_id;
 	dat.pool_id = pool_id;
 	pool_id = xid;
 
-	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
+	/* Maybe a worker thread is updating the nonce while we read it.
+	 * To avoid duplicated share calculations the job ID is checked in the worker thread
+	 * after the nonce is read.
+	 */
+	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
 	oGlobalWork = pWork;
-	iConsumeCnt.store(0, std::memory_order_seq_cst);
-	iGlobalJobNo++;
+	
+	jobLock.UnLock();
 }
 
 } // namespace xmrstak
diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp
index fafd232..c8d6917 100644
--- a/xmrstak/backend/globalStates.hpp
+++ b/xmrstak/backend/globalStates.hpp
@@ -1,26 +1,16 @@
 #pragma once
 
-#include "miner_work.hpp"
+#include "xmrstak/backend/miner_work.hpp"
 #include "xmrstak/misc/environment.hpp"
 #include "xmrstak/misc/console.hpp"
+#include "xmrstak/backend/pool_data.hpp"
+#include "xmrstak/cpputil/read_write_lock.h"
 
 #include <atomic>
 
-constexpr static size_t invalid_pool_id = (-1);
-
 namespace xmrstak
 {
 
-struct pool_data
-{
-	uint32_t iSavedNonce;
-	size_t   pool_id;
-
-	pool_data() : iSavedNonce(0), pool_id(invalid_pool_id)
-	{
-	}
-};
-
 struct globalStates
 {
 	static inline globalStates& inst()
@@ -42,6 +32,8 @@ struct globalStates
 			nonce = iGlobalNonce.fetch_add(reserve_count);
 	}
 
+	void consume_work( miner_work& threadWork, uint64_t& currentJobId);
+
 	miner_work oGlobalWork;
 	std::atomic<uint64_t> iGlobalJobNo;
 	std::atomic<uint64_t> iConsumeCnt;
@@ -50,9 +42,11 @@ struct globalStates
 	size_t pool_id = invalid_pool_id;
 
 private:
-	globalStates() : iThreadCount(0)
+	globalStates() : iThreadCount(0), iGlobalJobNo(0), iConsumeCnt(0)
 	{
 	}
+
+	::cpputil::RWLock jobLock;
 };
 
 } // namespace xmrstak
diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp
index fdc647e..18411b7 100644
--- a/xmrstak/backend/iBackend.hpp
+++ b/xmrstak/backend/iBackend.hpp
@@ -20,7 +20,7 @@ namespace xmrstak
 	{
 
 		enum BackendType : uint32_t { UNKNOWN = 0u, CPU = 1u, AMD = 2u, NVIDIA = 3u };
-		
+
 		static const char* getName(const BackendType type)
 		{
 			const char* backendNames[] = {
diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp
index 438ec0d..b6456f0 100644
--- a/xmrstak/backend/miner_work.hpp
+++ b/xmrstak/backend/miner_work.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "xmrstak/backend/pool_data.hpp"
+
 #include <thread>
 #include <atomic>
 #include <mutex>
@@ -20,7 +22,7 @@ namespace xmrstak
 		bool        bStall;
 		size_t      iPoolId;
 
-		miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { }
+		miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(invalid_pool_id) { }
 
 		miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize,
 			uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize),
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index d8bb621..1246809 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -21,7 +21,7 @@ namespace nvidia
 {
 
 class autoAdjust
-{    
+{
 public:
 
 	autoAdjust()
@@ -42,7 +42,7 @@ public:
 		// evaluate config parameter for if auto adjustment is needed
 		for(int i = 0; i < deviceCount; i++)
 		{
-	   
+
 			nvid_ctx ctx;
 
 			ctx.device_id = i;
diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl
index f489956..2aa68dc 100644
--- a/xmrstak/backend/nvidia/config.tpl
+++ b/xmrstak/backend/nvidia/config.tpl
@@ -22,7 +22,7 @@ R"===(
  * A filled out configuration should look like this:
  * "gpu_threads_conf" :
  * [
- *     { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0, 
+ *     { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0,
  *       "affine_to_cpu" : false, "sync_mode" : 3,
  *     },
  * ],
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 92f5f78..16171e1 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -181,7 +181,7 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 		}
 		else
 			printer::inst()->print_msg(L1, "Starting NVIDIA GPU thread %d, no affinity.", i);
-		
+
 		minethd* thd = new minethd(pWork, i + threadOffset, cfg);
 		pvThreads->push_back(thd);
 
@@ -195,27 +195,6 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	return pvThreads;
 }
 
-void minethd::switch_work(miner_work& pWork)
-{
-	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
-	// faster than threads can consume them. This should never happen in real life.
-	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
-
-	while (globalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < globalStates::inst().iThreadCount)
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-	globalStates::inst().oGlobalWork = pWork;
-	globalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst);
-	globalStates::inst().iGlobalJobNo++;
-}
-
-void minethd::consume_work()
-{
-	memcpy(&oWork, &globalStates::inst().oGlobalWork, sizeof(miner_work));
-	iJobNo++;
-	globalStates::inst().iConsumeCnt++;
-}
-
 void minethd::work_main()
 {
 	if(affinity >= 0) //-1 means no affinity
@@ -223,7 +202,7 @@ void minethd::work_main()
 
 	if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
 	{
-		printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
+		printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exiting.\n", (int)iThreadNo);
 		std::exit(0);
 	}
 
@@ -237,14 +216,12 @@ void minethd::work_main()
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
-	
+
 	// start with root algorithm and switch later if fork version is reached
 	auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
-	
-	uint32_t iNonce;
 
-	globalStates::inst().iConsumeCnt++;
+	uint32_t iNonce;
 
 	uint8_t version = 0;
 	size_t lastPoolId = 0;
@@ -261,7 +238,7 @@ void minethd::work_main()
 			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
-			consume_work();
+			globalStates::inst().consume_work(oWork, iJobNo);
 			continue;
 		}
 		uint8_t new_version = oWork.getVersion();
@@ -298,8 +275,11 @@ void minethd::work_main()
 			if((round_ctr++ & 0xF) == 0)
 			{
 				globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, h_per_round * 16);
+				// check if the job is still valid, there is a small posibility that the job is switched
+				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
+					break;
 			}
-			
+
 			uint32_t foundNonce[10];
 			uint32_t foundCount;
 
@@ -337,7 +317,7 @@ void minethd::work_main()
 			std::this_thread::yield();
 		}
 
-		consume_work();
+		globalStates::inst().consume_work(oWork, iJobNo);
 	}
 }
 
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index 89c2944..d4ae038 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -24,7 +24,6 @@ class minethd : public iBackend
 {
 public:
 
-	static void switch_work(miner_work& pWork);
 	static std::vector<iBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork);
 	static bool self_test();
 
@@ -33,16 +32,14 @@ private:
 
 	minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg);
 	void start_mining();
-	
+
 	void work_main();
-	void consume_work();
 
 	static std::atomic<uint64_t> iGlobalJobNo;
 	static std::atomic<uint64_t> iConsumeCnt;
 	static uint64_t iThreadCount;
 	uint64_t iJobNo;
 
-	static miner_work oGlobalWork;
 	miner_work oWork;
 
 	std::promise<void> numa_promise;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
index c1e31b9..d588641 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
@@ -14,7 +14,7 @@ typedef struct {
 	int device_blocks;
 	int device_threads;
 	int device_bfactor;
-	int device_bsleep;	
+	int device_bsleep;
 	int syncMode;
 
 	uint32_t *d_input;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
index 340174c..611fe1c 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
@@ -95,7 +95,7 @@ __device__ void cn_blake_update(blake_state *  S, const uint8_t *  data, uint64_
 	uint32_t left = S->buflen >> 3;
 	uint32_t fill = 64 - left;
 
-	if (left && (((datalen >> 3) & 0x3F) >= fill)) 
+	if (left && (((datalen >> 3) & 0x3F) >= fill))
 	{
 		memcpy((void *) (S->buf + left), (void *) data, fill);
 		S->t[0] += 512;
@@ -106,7 +106,7 @@ __device__ void cn_blake_update(blake_state *  S, const uint8_t *  data, uint64_
 		left = 0;
 	}
 
-	while (datalen >= 512) 
+	while (datalen >= 512)
 	{
 		S->t[0] += 512;
 		if (S->t[0] == 0) S->t[1]++;
@@ -115,12 +115,12 @@ __device__ void cn_blake_update(blake_state *  S, const uint8_t *  data, uint64_
 		datalen -= 512;
 	}
 
-	if (datalen > 0) 
+	if (datalen > 0)
 	{
 		memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
 		S->buflen = (left << 3) + datalen;
 	}
-	else 
+	else
 	{
 		S->buflen = 0;
 	}
@@ -128,7 +128,7 @@ __device__ void cn_blake_update(blake_state *  S, const uint8_t *  data, uint64_
 
 __device__ void cn_blake_final(blake_state *  S, uint8_t *  digest)
 {
-	const uint8_t padding[] = 
+	const uint8_t padding[] =
 	{
 		0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -141,20 +141,20 @@ __device__ void cn_blake_final(blake_state *  S, uint8_t *  digest)
 	U32TO8(msglen + 0, hi);
 	U32TO8(msglen + 4, lo);
 
-	if (S->buflen == 440) 
+	if (S->buflen == 440)
 	{
 		S->t[0] -= 8;
 		cn_blake_update(S, &pa, 8);
-	} 
-	else 
+	}
+	else
 	{
-		if (S->buflen < 440) 
+		if (S->buflen < 440)
 		{
 			if (S->buflen == 0) S->nullt = 1;
 			S->t[0] -= 440 - S->buflen;
 			cn_blake_update(S, padding, 440 - S->buflen);
 		}
-		else 
+		else
 		{
 			S->t[0] -= 512 - S->buflen;
 			cn_blake_update(S, padding, 512 - S->buflen);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 43740d2..57b6ad0 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -170,7 +170,7 @@ __forceinline__ __device__ void unusedVar( const T& )
  * - for <sm_30 shared memory is needed
  *
  * group_n - must be a power of 2!
- * 
+ *
  * @param ptr pointer to shared memory, size must be `threadIdx.x * sizeof(uint32_t)`
  *            value can be NULL for compute architecture >=sm_30
  * @param sub thread number within the group, range [0:group_n]
@@ -187,7 +187,7 @@ __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_
     unusedVar( ptr );
     unusedVar( sub );
 #   if(__CUDACC_VER_MAJOR__ >= 9)
-    return __shfl_sync(0xFFFFFFFF, val, src, group_n );
+    return __shfl_sync(__activemask(), val, src, group_n );
 #	else
 	return __shfl( val, src, group_n );
 #	endif
@@ -231,7 +231,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 	uint32_t t1[2], t2[2], res;
 
 	uint32_t tweak1_2[2];
-	if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+	if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
 	{
 		uint32_t * state = d_ctx_state + thread * 50;
 		tweak1_2[0] = (d_input[8] >> 24) | (d_input[9] << 8);
@@ -275,16 +275,25 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 			t1[0] = shuffle<4>(sPtr,sub, d[x], 0);
 
 			const uint32_t z = d[0] ^ d[1];
-			if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+			if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
 			{
 				const uint32_t table = 0x75310U;
-				const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2);
-				const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24;
-				storeGlobal32( long_state + j, sub == 2 ? fork_7 : z );
+				if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc)
+				{
+					const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2);
+					const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24;
+					storeGlobal32( long_state + j, sub == 2 ? fork_7 : z );
+				}
+				else if(ALGO == cryptonight_stellite)
+				{
+					const uint32_t index = ((z >> 27) & 12) | ((z >> 23) & 2);
+					const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24;
+					storeGlobal32( long_state + j, sub == 2 ? fork_7 : z );
+				}
 			}
 			else
 				storeGlobal32( long_state + j, z );
-			
+
 			//MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & MASK]);
 			j = ( ( *t1 & MASK ) >> 2 ) + sub;
 
@@ -303,16 +312,22 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 
 			res = *( (uint64_t *) t2 )  >> ( sub & 1 ? 32 : 0 );
 
-			
-			if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+			if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
 			{
 				const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res;
-				const uint32_t long_state_update = sub2 ? tweaked_res : res;
+				uint32_t long_state_update = sub2 ? tweaked_res : res;
+
+				if (ALGO == cryptonight_ipbc)
+				{
+					uint32_t value = shuffle<4>(sPtr,sub, long_state_update, sub & 1) ^ long_state_update;
+					long_state_update = sub >= 2 ? value : long_state_update;
+				}
+
 				storeGlobal32( long_state + j, long_state_update );
 			}
 			else
 				storeGlobal32( long_state + j, res );
-			
+
 			a = ( sub & 1 ? yy[1] : yy[0] ) ^ res;
 			idx0 = shuffle<4>(sPtr,sub, a, 0);
 			if(ALGO == cryptonight_heavy)
@@ -363,7 +378,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	MEMCPY8( text, d_ctx_state + thread * 50 + sub + 16, 2 );
 
 	__syncthreads( );
-	
+
 #if( __CUDA_ARCH__ < 300 )
 	extern __shared__ uint32_t shuffleMem[];
 	volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFF8));
@@ -378,7 +393,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 			text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
 
 		cn_aes_pseudo_round_mut( sharedMemory, text, key );
-		
+
 		if(ALGO == cryptonight_heavy)
 		{
 			#pragma unroll
@@ -415,7 +430,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce)
 	{
 		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,MEMORY><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
 			bfactorOneThree, i,
-			ctx->d_long_state, 
+			ctx->d_long_state,
 			(ALGO == cryptonight_heavy ? ctx->d_ctx_state2 : ctx->d_ctx_state),
 			ctx->d_ctx_key1 ));
 
@@ -458,7 +473,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce)
 
 	for ( int i = 0; i < roundsPhase3; i++ )
 	{
-		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< 
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<<
 			grid,
 			block8,
 			block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
@@ -492,5 +507,13 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t
 	{
 		cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon>(ctx, startNonce);
 	}
+	else if(miner_algo == cryptonight_ipbc)
+	{
+		cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc>(ctx, startNonce);
+	}
+	else if(miner_algo == cryptonight_stellite)
+	{
+		cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite>(ctx, startNonce);
+	}
 
 }
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index f192f01..304997e 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -184,7 +184,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3
 	uint32_t * __restrict__ ctx_state = d_ctx_state + thread * 50;
 	uint64_t hash[4];
 	uint32_t state[50];
-	
+
 	#pragma unroll
 	for ( i = 0; i < 50; i++ )
 		state[i] = ctx_state[i];
@@ -296,7 +296,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
 	}
 	else
 		ctx->d_ctx_state2 = ctx->d_ctx_state;
-	
+
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize));
@@ -472,7 +472,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20);
 		if(it == arch.end())
 		{
-			printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+			printf("WARNING: NVIDIA GPU %d: miner not compiled for CUDA architecture %d.\n", ctx->device_id, gpuArch);
 			return 5;
 		}
 	}
@@ -490,7 +490,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 				minSupportedArch = arch[i];
 		if(minSupportedArch < 30 || gpuArch < minSupportedArch)
 		{
-			printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+			printf("WARNING: NVIDIA GPU %d: miner not compiled for CUDA architecture %d.\n", ctx->device_id, gpuArch);
 			return 5;
 		}
 	}
@@ -517,7 +517,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		 */
 		ctx->device_threads = 64;
 		constexpr size_t byteToMiB = 1024u * 1024u;
-		
+
 		// no limit by default 1TiB
 		size_t maxMemUsage = byteToMiB * byteToMiB;
 		if(props.major == 6)
@@ -575,7 +575,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		CUDA_CHECK(ctx->device_id, cudaFree(tmp));
 		// delete created context on the gpu
 		CUDA_CHECK(ctx->device_id, cudaDeviceReset());
-		
+
 		ctx->total_device_memory = totalMemory;
 		ctx->free_device_memory = freeMemory;
 
@@ -614,7 +614,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		size_t perThread = hashMemSize + 16192u + 680u;
 		if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo())
 			perThread += 50 * 4; // state double buffer
-		
+
 		size_t max_intensity = limitedMemory / perThread;
 		ctx->device_threads = max_intensity / ctx->device_blocks;
 		// use only odd number of threads
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
index 055a8bd..4d369f8 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
@@ -36,8 +36,8 @@ __forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int
 	{
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
-	} 
-	else 
+	}
+	else
 	{
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp
index a37934c..d5a98b7 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp
@@ -167,7 +167,7 @@ __device__ void cn_groestl_F512(uint32_t * __restrict__ h, const uint32_t * __re
 	uint32_t y[2*GROESTL_COLS512];
 	uint32_t z[2*GROESTL_COLS512];
 
-	for (i = 0; i < 2*GROESTL_COLS512; i++) 
+	for (i = 0; i < 2*GROESTL_COLS512; i++)
 	{
 		z[i] = m[i];
 		Ptmp[i] = h[i]^m[i];
@@ -227,23 +227,23 @@ __device__ void cn_groestl_outputtransformation(groestlHashState *ctx)
 __device__ void cn_groestl_transform(groestlHashState * __restrict__ ctx,
 	const uint8_t * __restrict__ input, int msglen)
 {
-	for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512) 
+	for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512)
 	{
 		cn_groestl_F512(ctx->chaining,(uint32_t*)input);
 		ctx->block_counter1++;
 
-		if (ctx->block_counter1 == 0) 
+		if (ctx->block_counter1 == 0)
 			ctx->block_counter2++;
 	}
 }
 
-__device__ void cn_groestl_final(groestlHashState*  __restrict__ ctx, 
+__device__ void cn_groestl_final(groestlHashState*  __restrict__ ctx,
 	BitSequence* __restrict__  output)
 {
 	int i, j = 0, hashbytelen = GROESTL_HASH_BIT_LEN/8;
 	uint8_t *s = (BitSequence*)ctx->chaining;
 
-	if (ctx->bits_in_last_byte) 
+	if (ctx->bits_in_last_byte)
 	{
 		ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<ctx->bits_in_last_byte)-1)<<(8-ctx->bits_in_last_byte);
 		ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-ctx->bits_in_last_byte);
@@ -254,9 +254,9 @@ __device__ void cn_groestl_final(groestlHashState*  __restrict__ ctx,
 		ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
 	}
 
-	if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) 
+	if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN)
 	{
-		while (ctx->buf_ptr < GROESTL_SIZE512) 
+		while (ctx->buf_ptr < GROESTL_SIZE512)
 			ctx->buffer[(int)ctx->buf_ptr++] = 0;
 
 		cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512);
@@ -300,14 +300,14 @@ __device__ void cn_groestl_update(groestlHashState* __restrict__ ctx,
 	int msglen = (int)(databitlen/8);
 	int rem = (int)(databitlen%8);
 
-	if (ctx->buf_ptr) 
+	if (ctx->buf_ptr)
 	{
 		while (ctx->buf_ptr < GROESTL_SIZE512 && index < msglen)
 			ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
 
-		if (ctx->buf_ptr < GROESTL_SIZE512) 
+		if (ctx->buf_ptr < GROESTL_SIZE512)
 		{
-			if (rem) 
+			if (rem)
 			{
 				ctx->bits_in_last_byte = rem;
 				ctx->buffer[(int)ctx->buf_ptr++] = input[index];
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp
index 679046e..284039f 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp
@@ -111,7 +111,7 @@ __device__ void cn_jh_E8(jhHashState *state)
 {
 	uint64_t i,roundnumber,temp0,temp1;
 
-	for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) 
+	for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7)
 	{
 		for (i = 0; i < 2; i++)
 		{
@@ -155,13 +155,13 @@ __device__ void cn_jh_E8(jhHashState *state)
 			JH_SWAP32(state->x[1][i]); JH_SWAP32(state->x[3][i]); JH_SWAP32(state->x[5][i]); JH_SWAP32(state->x[7][i]);
 		}
 
-		for (i = 0; i < 2; i++) 
+		for (i = 0; i < 2; i++)
 		{
 			JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+6])[i],((uint64_t *)d_E8_rc[roundnumber+6])[i+2] );
 			JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
 		}
 
-		for (i = 1; i < 8; i = i+2) 
+		for (i = 1; i < 8; i = i+2)
 		{
 			temp0 = state->x[i][0];
 			state->x[i][0] = state->x[i][1];
@@ -190,7 +190,7 @@ __device__ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence
 	state->databitlen += databitlen;
 	index = 0;
 
-	if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  ) 
+	if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  )
 	{
 		if ( (databitlen & 7) == 0 )
 			memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3));
@@ -215,7 +215,7 @@ __device__ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence
 		cn_jh_F8(state);
 	}
 
-	if ( databitlen > 0) 
+	if ( databitlen > 0)
 	{
 		if ((databitlen & 7) == 0)
 			memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
@@ -247,7 +247,7 @@ __device__ void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __re
 		state->buffer[56] = (state->databitlen >> 56) & 0xff;
 		cn_jh_F8(state);
 	}
-	else 
+	else
 	{
 		/*set the rest of the bytes in the buffer to 0*/
 		if ( (state->datasize_in_buffer & 7) == 0)
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp
index 041a593..fc45db1 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp
@@ -221,7 +221,7 @@ __device__ void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, co
 		ctx->X[7] = X7 ^ w[7];
 
 		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
-	} 
+	}
 	while (--blkCnt);
 
 	ctx->h.T[0] = ts[0];
@@ -239,7 +239,7 @@ __device__ void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __
 
 	ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
 
-	if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) 
+	if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
 	{
 		memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
 		//p8 = &ctx->b[ctx->h.bCnt];
@@ -258,7 +258,7 @@ __device__ void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __
 
 	memcpy(X,ctx->X,sizeof(X));
 
-	for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) 
+	for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
 	{
 		((uint64_t *)ctx->b)[0]= (uint64_t)i;
 		Skein_Start_New_Type(ctx,OUT_FINAL);
@@ -275,15 +275,15 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui
 {
 	size_t n;
 
-	if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) 
+	if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
 	{
 
-		if (ctx->h.bCnt) 
+		if (ctx->h.bCnt)
 		{
 
 			n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
 
-			if (n) 
+			if (n)
 			{
 				memcpy(&ctx->b[ctx->h.bCnt],msg,n);
 				msgByteCnt  -= n;
@@ -295,7 +295,7 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui
 			ctx->h.bCnt = 0;
 		}
 
-		if (msgByteCnt > SKEIN_512_BLOCK_BYTES) 
+		if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
 		{
 			n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;
 			cn_skein512_processblock(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
@@ -304,7 +304,7 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui
 		}
 	}
 
-	if (msgByteCnt) 
+	if (msgByteCnt)
 	{
 		memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
 		ctx->h.bCnt += msgByteCnt;
@@ -313,11 +313,11 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui
 
 __device__ void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen)
 {
-	if ((databitlen & 7) == 0) 
+	if ((databitlen & 7) == 0)
 	{
 		cn_skein512_update(&state->u.ctx_512,data,databitlen >> 3);
 	}
-	else 
+	else
 	{
 
 		size_t bCnt = (databitlen >> 3) + 1;
diff --git a/xmrstak/backend/plugin.hpp b/xmrstak/backend/plugin.hpp
index 2610db8..1811af2 100644
--- a/xmrstak/backend/plugin.hpp
+++ b/xmrstak/backend/plugin.hpp
@@ -27,7 +27,7 @@ namespace xmrstak
 struct plugin
 {
 
-	plugin(const std::string backendName, const std::string libName) : fn_starterBackend(nullptr), m_backendName(backendName)
+	plugin(const std::string backendName, const std::string libName) : fn_startBackend(nullptr), m_backendName(backendName)
 	{
 #ifdef WIN32
 		libBackend = LoadLibrary(TEXT((libName + ".dll").c_str()));
@@ -59,15 +59,15 @@ struct plugin
 #endif
 
 #ifdef WIN32
-		fn_starterBackend = (starterBackend_t) GetProcAddress(libBackend, "xmrstak_start_backend");
-		if (!fn_starterBackend)
+		fn_startBackend = (startBackend_t) GetProcAddress(libBackend, "xmrstak_start_backend");
+		if (!fn_startBackend)
 		{
 			std::cerr << "WARNING: backend plugin " << libName << " contains no entry 'xmrstak_start_backend': " <<GetLastError()<< std::endl;
 		}
 #else
 		// reset last error
 		dlerror();
-		fn_starterBackend = (starterBackend_t) dlsym(libBackend, "xmrstak_start_backend");
+		fn_startBackend = (startBackend_t) dlsym(libBackend, "xmrstak_start_backend");
 		const char* dlsym_error = dlerror();
 		if(dlsym_error)
 		{
@@ -78,21 +78,21 @@ struct plugin
 
 	std::vector<iBackend*>* startBackend(uint32_t threadOffset, miner_work& pWork, environment& env)
 	{
-		if(fn_starterBackend == nullptr)
+		if(fn_startBackend == nullptr)
 		{
 			std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>();
 			std::cerr << "WARNING: " << m_backendName << " Backend disabled"<< std::endl;
 			return pvThreads;
 		}
 
-		return fn_starterBackend(threadOffset, pWork, env);
+		return fn_startBackend(threadOffset, pWork, env);
 	}
 
 	std::string m_backendName;
 
-	typedef std::vector<iBackend*>* (*starterBackend_t)(uint32_t threadOffset, miner_work& pWork, environment& env);
+	typedef std::vector<iBackend*>* (*startBackend_t)(uint32_t threadOffset, miner_work& pWork, environment& env);
 
-	starterBackend_t fn_starterBackend;
+	startBackend_t fn_startBackend;
 
 #ifdef WIN32
 	HINSTANCE libBackend;
@@ -100,7 +100,7 @@ struct plugin
 	void *libBackend;
 #endif
 
-/* \todo add unload to destructor and change usage of plugin that libs keeped open until the miner endss
+/* \todo add unload to destructor and change usage of plugin that libs kept open until the miner ends
 #ifdef WIN32
 	FreeLibrary(libBackend);
 #else
diff --git a/xmrstak/backend/pool_data.hpp b/xmrstak/backend/pool_data.hpp
new file mode 100644
index 0000000..4e92359
--- /dev/null
+++ b/xmrstak/backend/pool_data.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+constexpr static size_t invalid_pool_id = (-1);
+
+namespace xmrstak
+{
+
+struct pool_data
+{
+	uint32_t iSavedNonce;
+	size_t   pool_id;
+
+	pool_data() : iSavedNonce(0), pool_id(invalid_pool_id)
+	{
+	}
+};
+
+} // namespace xmrstak