diff options
Diffstat (limited to 'xmrstak/backend/cpu')
-rw-r--r-- | xmrstak/backend/cpu/autoAdjust.hpp | 4 | ||||
-rw-r--r-- | xmrstak/backend/cpu/autoAdjustHwloc.hpp | 10 | ||||
-rw-r--r-- | xmrstak/backend/cpu/config.tpl | 16 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_groestl.c | 14 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_groestl.h | 12 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_jh.c | 2 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_keccak.c | 12 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_skein.c | 90 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/c_skein.h | 8 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 89 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 4 | ||||
-rw-r--r-- | xmrstak/backend/cpu/crypto/soft_aes.hpp | 6 | ||||
-rw-r--r-- | xmrstak/backend/cpu/minethd.cpp | 94 | ||||
-rw-r--r-- | xmrstak/backend/cpu/minethd.hpp | 3 |
14 files changed, 225 insertions, 139 deletions
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index 518721a..57dbef0 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -52,7 +52,7 @@ public: std::string conf; - + if(!detectL3Size() || L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048u)) { if(L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048)) @@ -127,7 +127,7 @@ private: if(get_masked(cpu_info[0], 7, 5) != 3) { - printer::inst()->print_msg(L0, "Autoconf failed: Couln't find L3 cache page."); + printer::inst()->print_msg(L0, "Autoconf failed: Couldn't find L3 cache page."); return false; } diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index b1f3914..01d2280 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -37,7 +37,7 @@ public: bool printConfig() { - + hwloc_topology_t topology; hwloc_topology_init(&topology); hwloc_topology_load(topology); @@ -64,8 +64,8 @@ public: throw(std::runtime_error("The CPU doesn't seem to have a cache.")); for(hwloc_obj_t obj : tlcs) - proccessTopLevelCache(obj); - + processTopLevelCache(obj); + for(uint32_t id : results) { conf += std::string(" { \"low_power_mode\" : "); @@ -138,7 +138,7 @@ private: // Top level cache isn't shared with other cores on the same package // This will usually be 1 x L3, but can be 2 x L2 per package - void proccessTopLevelCache(hwloc_obj_t obj) + void processTopLevelCache(hwloc_obj_t obj) { if(obj->attr == nullptr) throw(std::runtime_error("Cache object hasn't got attributes.")); @@ -158,7 +158,7 @@ private: //Try our luck with lower level caches for(size_t i=0; i < obj->arity; i++) - proccessTopLevelCache(obj->children[i]); + processTopLevelCache(obj->children[i]); return; } diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl index cb4b950..2fc9a47 100644 --- a/xmrstak/backend/cpu/config.tpl +++ b/xmrstak/backend/cpu/config.tpl @@ -2,25 +2,25 @@ R"===( /* * Thread configuration for each thread. Make sure it matches the number above. * low_power_mode - This can either be a boolean (true or false), or a number between 1 to 5. When set to true, - * this mode will double the cache usage, and double the single thread performance. It will - * consume much less power (as less cores are working), but will max out at around 80-85% of + * this mode will double the cache usage, and double the single thread performance. It will + * consume much less power (as less cores are working), but will max out at around 80-85% of * the maximum performance. When set to a number N greater than 1, this mode will increase the * cache usage and single thread performance by N times. * - * no_prefetch - Some sytems can gain up to extra 5% here, but sometimes it will have no difference or make + * no_prefetch - Some systems can gain up to extra 5% here, but sometimes it will have no difference or make * things slower. * - * affine_to_cpu - This can be either false (no affinity), or the CPU core number. Note that on hyperthreading - * systems it is better to assign threads to physical cores. On Windows this usually means selecting - * even or odd numbered cpu numbers. For Linux it will be usually the lower CPU numbers, so for a 4 + * affine_to_cpu - This can be either false (no affinity), or the CPU core number. Note that on hyperthreading + * systems it is better to assign threads to physical cores. On Windows this usually means selecting + * even or odd numbered cpu numbers. For Linux it will be usually the lower CPU numbers, so for a 4 * physical core CPU you should select cpu numbers 0-3. * * On the first run the miner will look at your system and suggest a basic configuration that will work, * you can try to tweak it from there to get the best performance. - * + * * A filled out configuration should look like this: * "cpu_threads_conf" : - * [ + * [ * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 }, * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 }, * ], diff --git a/xmrstak/backend/cpu/crypto/c_groestl.c b/xmrstak/backend/cpu/crypto/c_groestl.c index 1318d5a..5b3523e 100644 --- a/xmrstak/backend/cpu/crypto/c_groestl.c +++ b/xmrstak/backend/cpu/crypto/c_groestl.c @@ -4,7 +4,7 @@ * * This work is based on the implementation of * Soeren S. Thomsen and Krystian Matusiewicz - * + * * */ @@ -22,7 +22,7 @@ const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6}; #define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \ v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \ v1 = temp_var;} - + #define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \ tu = T[2*(uint32_t)x[4*c0+0]]; \ @@ -161,11 +161,11 @@ static void F512(uint32_t *h, const uint32_t *m) { /* digest up to msglen bytes of input (full blocks only) */ static void Transform(groestlHashState *ctx, - const uint8_t *input, + const uint8_t *input, int msglen) { /* digest message, one block at a time */ - for (; msglen >= SIZE512; + for (; msglen >= SIZE512; msglen -= SIZE512, input += SIZE512) { F512(ctx->chaining,(uint32_t*)input); @@ -199,7 +199,7 @@ static void OutputTransformation(groestlHashState *ctx) { RND512P((uint8_t*)y, temp, 0x00000009); for (j = 0; j < 2*COLS512; j++) { ctx->chaining[j] ^= temp[j]; - } + } } /* initialise context */ @@ -313,7 +313,7 @@ static void Final(groestlHashState* ctx, ctx->block_counter2 >>= 8; } /* digest final padding block */ - Transform(ctx, ctx->buffer, SIZE512); + Transform(ctx, ctx->buffer, SIZE512); /* perform output transformation */ OutputTransformation(ctx); @@ -332,7 +332,7 @@ static void Final(groestlHashState* ctx, } /* hash bit sequence */ -void groestl(const BitSequence* data, +void groestl(const BitSequence* data, DataLength databitlen, BitSequence* hashval) { diff --git a/xmrstak/backend/cpu/crypto/c_groestl.h b/xmrstak/backend/cpu/crypto/c_groestl.h index 2b51339..47044b4 100644 --- a/xmrstak/backend/cpu/crypto/c_groestl.h +++ b/xmrstak/backend/cpu/crypto/c_groestl.h @@ -4,10 +4,10 @@ #include "crypto_uint8.h" #include "crypto_uint32.h" #include "crypto_uint64.h" -#include "crypto_hash.h" +#include "crypto_hash.h" -typedef crypto_uint8 uint8_t; -typedef crypto_uint32 uint32_t; +typedef crypto_uint8 uint8_t; +typedef crypto_uint32 uint32_t; typedef crypto_uint64 uint64_t; */ #include <stdint.h> @@ -42,7 +42,7 @@ typedef struct { BitSequence buffer[SIZE512]; /* data buffer */ int buf_ptr; /* data buffer pointer */ int bits_in_last_byte; /* no. of message bits in last byte of - data buffer */ + data buffer */ } groestlHashState; /*void Init(hashState*); @@ -53,8 +53,8 @@ void groestl(const BitSequence*, DataLength, BitSequence*); /* int crypto_hash(unsigned char *out, - const unsigned char *in, - unsigned long long len); + const unsigned char *in, + unsigned long long len); */ #endif /* __hash_h */ diff --git a/xmrstak/backend/cpu/crypto/c_jh.c b/xmrstak/backend/cpu/crypto/c_jh.c index 9d685a0..0256a0f 100644 --- a/xmrstak/backend/cpu/crypto/c_jh.c +++ b/xmrstak/backend/cpu/crypto/c_jh.c @@ -234,7 +234,7 @@ static HashReturn Init(hashState *state, int hashbitlen) /*initialize the initial hash value of JH*/ state->hashbitlen = hashbitlen; - /*load the intital hash value into state*/ + /*load the initial hash value into state*/ switch (hashbitlen) { case 224: memcpy(state->x,JH224_H0,128); break; diff --git a/xmrstak/backend/cpu/crypto/c_keccak.c b/xmrstak/backend/cpu/crypto/c_keccak.c index eadb85b..63c1614 100644 --- a/xmrstak/backend/cpu/crypto/c_keccak.c +++ b/xmrstak/backend/cpu/crypto/c_keccak.c @@ -12,14 +12,14 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) #endif -const uint64_t keccakf_rndc[24] = +const uint64_t keccakf_rndc[24] = { 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; @@ -130,7 +130,7 @@ void keccakf(uint64_t st[25], int rounds) st[j + 2] ^= (~bc[3]) & bc[4]; st[j + 3] ^= (~bc[4]) & bc[0]; st[j + 4] ^= (~bc[0]) & bc[1]; - + // Iota st[0] ^= keccakf_rndc[round]; } @@ -147,7 +147,7 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen; rsizw = rsiz / 8; - + memset(st, 0, sizeof(st)); for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) { @@ -155,7 +155,7 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) st[i] ^= ((uint64_t *) in)[i]; keccakf(st, KECCAK_ROUNDS); } - + // last block and padding memcpy(temp, in, inlen); temp[inlen++] = 1; @@ -173,4 +173,4 @@ void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) void keccak1600(const uint8_t *in, int inlen, uint8_t *md) { keccak(in, inlen, md, sizeof(state_t)); -}
\ No newline at end of file +} diff --git a/xmrstak/backend/cpu/crypto/c_skein.c b/xmrstak/backend/cpu/crypto/c_skein.c index 2453713..e2d5442 100644 --- a/xmrstak/backend/cpu/crypto/c_skein.c +++ b/xmrstak/backend/cpu/crypto/c_skein.c @@ -5,7 +5,7 @@ ** Source code author: Doug Whiting, 2008. ** ** This algorithm and source code is released to the public domain. -** +** ************************************************************************/ #define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ @@ -96,12 +96,12 @@ static int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); ** After an InitExt() call, just use Update/Final calls as with Init(). ** ** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. -** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, +** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, ** the results of InitExt() are identical to calling Init(). ** The function Init() may be called once to "precompute" the IV for ** a given hashBitLen value, then by saving a copy of the context ** the IV computation may be avoided in later calls. -** Similarly, the function InitExt() may be called once per MAC key +** Similarly, the function InitExt() may be called once per MAC key ** to precompute the MAC IV, then a copy of the context saved and ** reused for each new MAC computation. **/ @@ -135,7 +135,7 @@ static int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); /***************************************************************** ** "Internal" Skein definitions -** -- not needed for sequential hashing API, but will be +** -- not needed for sequential hashing API, but will be ** helpful for other uses of Skein (e.g., tree hash mode). ** -- included here so that they can be shared between ** reference and optimized code. @@ -257,11 +257,11 @@ static int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); #define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ #define Skein_assert(x) #elif defined(SKEIN_ASSERT) -#include <assert.h> -#define Skein_Assert(x,retCode) assert(x) -#define Skein_assert(x) assert(x) +#include <assert.h> +#define Skein_Assert(x,retCode) assert(x) +#define Skein_assert(x) assert(x) #else -#include <assert.h> +#include <assert.h> #define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ #define Skein_assert(x) assert(x) /* internal error */ #endif @@ -269,8 +269,8 @@ static int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); /***************************************************************** ** Skein block function constants (shared across Ref and Opt code) ******************************************************************/ -enum -{ +enum +{ /* Skein_256 round rotation constants */ R_256_0_0=14, R_256_0_1=16, R_256_1_0=52, R_256_1_1=57, @@ -518,7 +518,7 @@ const u64b_t SKEIN1024_IV_1024[] = #define BLK_BITS (WCNT*64) /* some useful definitions for code here */ #define KW_TWK_BASE (0) #define KW_KEY_BASE (3) -#define ks (kw + KW_KEY_BASE) +#define ks (kw + KW_KEY_BASE) #define ts (kw + KW_TWK_BASE) #ifdef SKEIN_DEBUG @@ -567,7 +567,7 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s ts[0] += byteCntAdd; /* update processed length */ /* precompute the key schedule for this block */ - ks[0] = ctx->X[0]; + ks[0] = ctx->X[0]; ks[1] = ctx->X[1]; ks[2] = ctx->X[2]; ks[3] = ctx->X[3]; @@ -594,7 +594,7 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ -#if SKEIN_UNROLL_256 == 0 +#if SKEIN_UNROLL_256 == 0 #define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \ Round256(p0,p1,p2,p3,ROT,rNum) \ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); @@ -620,8 +620,8 @@ static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,s Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */ -#endif - { +#endif + { #define R256_8_rounds(R) \ R256(0,1,2,3,R_256_0,8*(R) + 1); \ R256(0,3,2,1,R_256_1,8*(R) + 2); \ @@ -762,7 +762,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s ks[5] = ctx->X[5]; ks[6] = ctx->X[6]; ks[7] = ctx->X[7]; - ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; ts[2] = ts[0] ^ ts[1]; @@ -790,7 +790,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ -#if SKEIN_UNROLL_512 == 0 +#if SKEIN_UNROLL_512 == 0 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); @@ -1022,7 +1022,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \ X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \ -#if SKEIN_UNROLL_1024 == 0 +#if SKEIN_UNROLL_1024 == 0 #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr); @@ -1044,7 +1044,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \ X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \ X15 += ks[((R)+16) % 17] + (R)+1; \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); #else /* looping version */ #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ @@ -1072,7 +1072,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */ -#endif +#endif { #define R1024_8_rounds(R) /* do 8 full rounds */ \ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \ @@ -1156,7 +1156,7 @@ static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,s ctx->X[15] = X15 ^ w[15]; Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); - + ts[1] &= ~SKEIN_T1_FLAG_FIRST; blkPtr += SKEIN1024_BLOCK_BYTES; } @@ -1193,7 +1193,7 @@ static int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) u08b_t b[SKEIN_256_STATE_BYTES]; u64b_t w[SKEIN_256_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ @@ -1237,13 +1237,13 @@ static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree u08b_t b[SKEIN_256_STATE_BYTES]; u64b_t w[SKEIN_256_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) /* is there a key? */ - { + { memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ } else /* here to pre-process a key */ @@ -1282,7 +1282,7 @@ static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree /* Set up to process the data message portion of the hash (default) */ ctx->h.bCnt = 0; /* buffer b[] starts out empty */ Skein_Start_New_Type(ctx,MSG); - + return SKEIN_SUCCESS; } #endif @@ -1334,7 +1334,7 @@ static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msg return SKEIN_SUCCESS; } - + /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* finalize the hash computation and output the result */ static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) @@ -1348,7 +1348,7 @@ static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + /* now output the result */ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ @@ -1391,7 +1391,7 @@ static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) u08b_t b[SKEIN_512_STATE_BYTES]; u64b_t w[SKEIN_512_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ @@ -1437,13 +1437,13 @@ static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree u08b_t b[SKEIN_512_STATE_BYTES]; u64b_t w[SKEIN_512_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) /* is there a key? */ - { + { memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ } else /* here to pre-process a key */ @@ -1482,7 +1482,7 @@ static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree /* Set up to process the data message portion of the hash (default) */ ctx->h.bCnt = 0; /* buffer b[] starts out empty */ Skein_Start_New_Type(ctx,MSG); - + return SKEIN_SUCCESS; } #endif @@ -1534,7 +1534,7 @@ static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msg return SKEIN_SUCCESS; } - + /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* finalize the hash computation and output the result */ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) @@ -1548,7 +1548,7 @@ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + /* now output the result */ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ @@ -1590,7 +1590,7 @@ static int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) u08b_t b[SKEIN1024_STATE_BYTES]; u64b_t w[SKEIN1024_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ @@ -1635,13 +1635,13 @@ static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree u08b_t b[SKEIN1024_STATE_BYTES]; u64b_t w[SKEIN1024_STATE_WORDS]; } cfg; /* config block */ - + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) /* is there a key? */ - { + { memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ } else /* here to pre-process a key */ @@ -1680,7 +1680,7 @@ static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t tree /* Set up to process the data message portion of the hash (default) */ ctx->h.bCnt = 0; /* buffer b[] starts out empty */ Skein_Start_New_Type(ctx,MSG); - + return SKEIN_SUCCESS; } #endif @@ -1732,7 +1732,7 @@ static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msg return SKEIN_SUCCESS; } - + /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* finalize the hash computation and output the result */ static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) @@ -1746,7 +1746,7 @@ static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + /* now output the result */ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ @@ -1790,9 +1790,9 @@ static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ - + return SKEIN_SUCCESS; } @@ -1806,9 +1806,9 @@ static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ - + return SKEIN_SUCCESS; } @@ -1822,9 +1822,9 @@ static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ - + return SKEIN_SUCCESS; } diff --git a/xmrstak/backend/cpu/crypto/c_skein.h b/xmrstak/backend/cpu/crypto/c_skein.h index 86dbc08..1aa11de 100644 --- a/xmrstak/backend/cpu/crypto/c_skein.h +++ b/xmrstak/backend/cpu/crypto/c_skein.h @@ -9,7 +9,7 @@ ** This algorithm and source code is released to the public domain. ** *************************************************************************** -** +** ** The following compile-time switches may be defined to control some ** tradeoffs between speed, code size, error checking, and security. ** @@ -20,8 +20,8 @@ ** [default: no callouts (no overhead)] ** ** SKEIN_ERR_CHECK -- how error checking is handled inside Skein -** code. If not defined, most error checking -** is disabled (for performance). Otherwise, +** code. If not defined, most error checking +** is disabled (for performance). Otherwise, ** the switch value is interpreted as: ** 0: use assert() to flag errors ** 1: return SKEIN_FAIL to flag errors @@ -42,6 +42,6 @@ typedef u08b_t SkeinBitSequence; /* bit stream type */ /* "all-in-one" call */ SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, - SkeinDataLength databitlen, SkeinBitSequence *hashval); + SkeinDataLength databitlen, SkeinBitSequence *hashval); #endif /* ifndef _SKEIN_H_ */ diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 7562de1..e15c474 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -28,7 +28,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) *hi = r >> 64; return (uint64_t)r; } -#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + #else #include <intrin.h> #endif // __GNUC__ @@ -422,6 +422,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } +template<xmrstak_algo ALGO> inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) { mem_out[0] = _mm_cvtsi128_si64(tmp); @@ -431,10 +432,21 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) uint8_t x = static_cast<uint8_t>(vh >> 24); static const uint16_t table = 0x7531; - const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc) + { + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; + } + else if(ALGO == cryptonight_stellite) + { + const uint8_t index = (((x >> 4) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; + } - mem_out[1] = vh; } template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> @@ -444,7 +456,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32); return; @@ -453,7 +465,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c keccak((const uint8_t *)input, len, ctx0->hash_state, 200); uint64_t monero_const; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) { monero_const = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24); @@ -482,8 +494,8 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -506,8 +518,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); ah0 += lo; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0]; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; + } else ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ah0 ^= ch; @@ -544,7 +561,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 64); return; @@ -554,7 +571,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200); uint64_t monero_const_0, monero_const_1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) { monero_const_0 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const_0 ^= *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24); @@ -592,8 +609,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -610,8 +627,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); else _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); @@ -631,8 +648,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = axl0; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0]; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; + } else ((uint64_t*)&l0[idx0 & MASK])[1] = axh0; @@ -662,8 +684,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh1 += lo; ((uint64_t*)&l1[idx1 & MASK])[0] = axl1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0]; + else + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; + } else ((uint64_t*)&l1[idx1 & MASK])[1] = axh1; @@ -701,7 +728,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ - c = _mm_load_si128(ptr); + c = _mm_load_si128(ptr); #define CN_STEP2(a, b, c, l, ptr, idx) \ if(SOFT_AES) \ @@ -709,8 +736,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ c = _mm_aesenc_si128(c, a); \ b = _mm_xor_si128(b, c); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \ - cryptonight_monero_tweak((uint64_t*)ptr, b); \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \ else \ _mm_store_si128(ptr, b);\ @@ -724,8 +751,12 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + { \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ + if (ALGO == cryptonight_ipbc) \ + ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\ + } \ else \ _mm_store_si128(ptr, a);\ a = _mm_xor_si128(a, b); \ @@ -751,7 +782,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 3); return; @@ -845,7 +876,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 4); return; @@ -883,13 +914,13 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni __m128i cx1 = _mm_set_epi64x(0, 0); __m128i cx2 = _mm_set_epi64x(0, 0); __m128i cx3 = _mm_set_epi64x(0, 0); - + uint64_t idx0, idx1, idx2, idx3; idx0 = _mm_cvtsi128_si64(ax0); idx1 = _mm_cvtsi128_si64(ax1); idx2 = _mm_cvtsi128_si64(ax2); idx3 = _mm_cvtsi128_si64(ax3); - + for (size_t i = 0; i < ITERATIONS/2; i++) { uint64_t hi, lo; @@ -954,7 +985,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 5); return; diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index ee3b663..a478c9b 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -118,7 +118,7 @@ BOOL AddLargePageRights() DWORD size = 0; GetTokenInformation(hToken, TokenUser, NULL, 0, &size); - + if (size > 0 && bIsElevated) { user = (PTOKEN_USER)LocalAlloc(LPTR, size); @@ -136,7 +136,7 @@ BOOL AddLargePageRights() ZeroMemory(&attributes, sizeof(attributes)); BOOL result = FALSE; - if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) + if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { LSA_UNICODE_STRING lockmem; lockmem.Buffer = L"SeLockMemoryPrivilege"; diff --git a/xmrstak/backend/cpu/crypto/soft_aes.hpp b/xmrstak/backend/cpu/crypto/soft_aes.hpp index d3f4637..9b4ae0a 100644 --- a/xmrstak/backend/cpu/crypto/soft_aes.hpp +++ b/xmrstak/backend/cpu/crypto/soft_aes.hpp @@ -104,9 +104,9 @@ static inline __m128i soft_aesenc(__m128i in, __m128i key) static inline uint32_t sub_word(uint32_t key) { - return (saes_sbox[key >> 24 ] << 24) | - (saes_sbox[(key >> 16) & 0xff] << 16 ) | - (saes_sbox[(key >> 8) & 0xff] << 8 ) | + return (saes_sbox[key >> 24 ] << 24) | + (saes_sbox[(key >> 16) & 0xff] << 16 ) | + (saes_sbox[(key >> 8) & 0xff] << 8 ) | saes_sbox[key & 0xff]; } diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index f8f70f9..482c085 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -95,6 +95,7 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id return pthread_setaffinity_np(h, sizeof(cpuset_t), &mn) == 0; #elif defined(__OpenBSD__) printer::inst()->print_msg(L0,"WARNING: thread pinning is not supported under OPENBSD."); + return true; #else cpu_set_t mn; CPU_ZERO(&mn); @@ -285,7 +286,12 @@ bool minethd::self_test() else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon) { } - + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_ipbc) + { + } + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_stellite) + { + } for (int i = 0; i < MAX_N; i++) cryptonight_free_ctx(ctx[i]); @@ -333,7 +339,7 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work } else printer::inst()->print_msg(L1, "Starting %dx thread, no affinity.", cfg.iMultiway); - + minethd* thd = new minethd(pWork, i + threadOffset, cfg.iMultiway, cfg.bNoPrefetch, cfg.iCpuAff); pvThreads.push_back(thd); } @@ -341,13 +347,6 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work return pvThreads; } -void minethd::consume_work() -{ - memcpy(&oWork, &globalStates::inst().inst().oGlobalWork, sizeof(miner_work)); - iJobNo++; - globalStates::inst().inst().iConsumeCnt++; -} - minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo) { // We have two independent flag bits in the functions @@ -372,6 +371,12 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr case cryptonight_aeon: algv = 4; break; + case cryptonight_ipbc: + algv = 5; + break; + case cryptonight_stellite: + algv = 6; + break; default: algv = 2; break; @@ -397,7 +402,15 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr cryptonight_hash<cryptonight_aeon, false, false>, cryptonight_hash<cryptonight_aeon, true, false>, cryptonight_hash<cryptonight_aeon, false, true>, - cryptonight_hash<cryptonight_aeon, true, true> + cryptonight_hash<cryptonight_aeon, true, true>, + cryptonight_hash<cryptonight_ipbc, false, false>, + cryptonight_hash<cryptonight_ipbc, true, false>, + cryptonight_hash<cryptonight_ipbc, false, true>, + cryptonight_hash<cryptonight_ipbc, true, true>, + cryptonight_hash<cryptonight_stellite, false, false>, + cryptonight_hash<cryptonight_stellite, true, false>, + cryptonight_hash<cryptonight_stellite, false, true>, + cryptonight_hash<cryptonight_stellite, true, true> }; std::bitset<2> digit; @@ -430,7 +443,6 @@ void minethd::work_main() piHashVal = (uint64_t*)(result.bResult + 24); piNonce = (uint32_t*)(oWork.bWorkBlob + 39); - globalStates::inst().inst().iConsumeCnt++; result.iThreadId = iThreadNo; uint8_t version = 0; @@ -448,7 +460,7 @@ void minethd::work_main() while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - consume_work(); + globalStates::inst().consume_work(oWork, iJobNo); continue; } @@ -491,6 +503,9 @@ void minethd::work_main() if((nonce_ctr++ & (nonce_chunk-1)) == 0) { globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk); + // check if the job is still valid, there is a small posibility that the job is switched + if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo) + break; } *piNonce = result.iNonce; @@ -504,7 +519,7 @@ void minethd::work_main() std::this_thread::yield(); } - consume_work(); + globalStates::inst().consume_work(oWork, iJobNo); } cryptonight_free_ctx(ctx); @@ -534,6 +549,12 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, case cryptonight_aeon: algv = 4; break; + case cryptonight_ipbc: + algv = 5; + break; + case cryptonight_stellite: + algv = 6; + break; default: algv = 2; break; @@ -573,7 +594,7 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash<cryptonight_lite, true, false>, cryptonight_penta_hash<cryptonight_lite, false, true>, cryptonight_penta_hash<cryptonight_lite, true, true>, - + cryptonight_double_hash<cryptonight, false, false>, cryptonight_double_hash<cryptonight, true, false>, cryptonight_double_hash<cryptonight, false, true>, @@ -623,13 +644,47 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash<cryptonight_aeon, false, false>, cryptonight_penta_hash<cryptonight_aeon, true, false>, cryptonight_penta_hash<cryptonight_aeon, false, true>, - cryptonight_penta_hash<cryptonight_aeon, true, true> + cryptonight_penta_hash<cryptonight_aeon, true, true>, + + cryptonight_double_hash<cryptonight_ipbc, false, false>, + cryptonight_double_hash<cryptonight_ipbc, true, false>, + cryptonight_double_hash<cryptonight_ipbc, false, true>, + cryptonight_double_hash<cryptonight_ipbc, true, true>, + cryptonight_triple_hash<cryptonight_ipbc, false, false>, + cryptonight_triple_hash<cryptonight_ipbc, true, false>, + cryptonight_triple_hash<cryptonight_ipbc, false, true>, + cryptonight_triple_hash<cryptonight_ipbc, true, true>, + cryptonight_quad_hash<cryptonight_ipbc, false, false>, + cryptonight_quad_hash<cryptonight_ipbc, true, false>, + cryptonight_quad_hash<cryptonight_ipbc, false, true>, + cryptonight_quad_hash<cryptonight_ipbc, true, true>, + cryptonight_penta_hash<cryptonight_ipbc, false, false>, + cryptonight_penta_hash<cryptonight_ipbc, true, false>, + cryptonight_penta_hash<cryptonight_ipbc, false, true>, + cryptonight_penta_hash<cryptonight_ipbc, true, true>, + + cryptonight_double_hash<cryptonight_stellite, false, false>, + cryptonight_double_hash<cryptonight_stellite, true, false>, + cryptonight_double_hash<cryptonight_stellite, false, true>, + cryptonight_double_hash<cryptonight_stellite, true, true>, + cryptonight_triple_hash<cryptonight_stellite, false, false>, + cryptonight_triple_hash<cryptonight_stellite, true, false>, + cryptonight_triple_hash<cryptonight_stellite, false, true>, + cryptonight_triple_hash<cryptonight_stellite, true, true>, + cryptonight_quad_hash<cryptonight_stellite, false, false>, + cryptonight_quad_hash<cryptonight_stellite, true, false>, + cryptonight_quad_hash<cryptonight_stellite, false, true>, + cryptonight_quad_hash<cryptonight_stellite, true, true>, + cryptonight_penta_hash<cryptonight_stellite, false, false>, + cryptonight_penta_hash<cryptonight_stellite, true, false>, + cryptonight_penta_hash<cryptonight_stellite, false, true>, + cryptonight_penta_hash<cryptonight_stellite, true, true>, }; std::bitset<2> digit; digit.set(0, !bHaveAes); digit.set(1, !bNoPrefetch); - + return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()]; } @@ -713,7 +768,7 @@ void minethd::multiway_work_main() while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - consume_work(); + globalStates::inst().consume_work(oWork, iJobNo); prep_multiway_work<N>(bWorkBlob, piNonce); continue; } @@ -758,6 +813,9 @@ void minethd::multiway_work_main() { globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk); nonce_ctr = nonce_chunk; + // check if the job is still valid, there is a small posibility that the job is switched + if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo) + break; } for (size_t i = 0; i < N; i++) @@ -776,7 +834,7 @@ void minethd::multiway_work_main() std::this_thread::yield(); } - consume_work(); + globalStates::inst().consume_work(oWork, iJobNo); prep_multiway_work<N>(bWorkBlob, piNonce); } diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 85a95d1..2d40ce3 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -47,11 +47,8 @@ private: void quad_work_main(); void penta_work_main(); - void consume_work(); - uint64_t iJobNo; - static miner_work oGlobalWork; miner_work oWork; std::promise<void> order_fix; |