diff options
Diffstat (limited to 'xmrstak/backend/cpu/crypto/cryptonight_aesni.h')
-rw-r--r-- | xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 89 |
1 files changed, 60 insertions, 29 deletions
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 7562de1..e15c474 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -28,7 +28,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) *hi = r >> 64; return (uint64_t)r; } -#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + #else #include <intrin.h> #endif // __GNUC__ @@ -422,6 +422,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } +template<xmrstak_algo ALGO> inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) { mem_out[0] = _mm_cvtsi128_si64(tmp); @@ -431,10 +432,21 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) uint8_t x = static_cast<uint8_t>(vh >> 24); static const uint16_t table = 0x7531; - const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc) + { + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; + } + else if(ALGO == cryptonight_stellite) + { + const uint8_t index = (((x >> 4) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; + } - mem_out[1] = vh; } template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> @@ -444,7 +456,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32); return; @@ -453,7 +465,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c keccak((const uint8_t *)input, len, ctx0->hash_state, 200); uint64_t monero_const; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) { monero_const = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24); @@ -482,8 +494,8 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -506,8 +518,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); ah0 += lo; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0]; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; + } else ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ah0 ^= ch; @@ -544,7 +561,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 64); return; @@ -554,7 +571,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200); uint64_t monero_const_0, monero_const_1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) { monero_const_0 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const_0 ^= *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24); @@ -592,8 +609,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -610,8 +627,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); else _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); @@ -631,8 +648,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = axl0; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0]; + else + ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; + } else ((uint64_t*)&l0[idx0 & MASK])[1] = axh0; @@ -662,8 +684,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh1 += lo; ((uint64_t*)&l1[idx1 & MASK])[0] = axl1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) - ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + { + if(ALGO == cryptonight_ipbc) + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0]; + else + ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; + } else ((uint64_t*)&l1[idx1 & MASK])[1] = axh1; @@ -701,7 +728,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ - c = _mm_load_si128(ptr); + c = _mm_load_si128(ptr); #define CN_STEP2(a, b, c, l, ptr, idx) \ if(SOFT_AES) \ @@ -709,8 +736,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ c = _mm_aesenc_si128(c, a); \ b = _mm_xor_si128(b, c); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \ - cryptonight_monero_tweak((uint64_t*)ptr, b); \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \ else \ _mm_store_si128(ptr, b);\ @@ -724,8 +751,12 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + { \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ + if (ALGO == cryptonight_ipbc) \ + ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\ + } \ else \ _mm_store_si128(ptr, a);\ a = _mm_xor_si128(a, b); \ @@ -751,7 +782,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 3); return; @@ -845,7 +876,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 4); return; @@ -883,13 +914,13 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni __m128i cx1 = _mm_set_epi64x(0, 0); __m128i cx2 = _mm_set_epi64x(0, 0); __m128i cx3 = _mm_set_epi64x(0, 0); - + uint64_t idx0, idx1, idx2, idx3; idx0 = _mm_cvtsi128_si64(ax0); idx1 = _mm_cvtsi128_si64(ax1); idx2 = _mm_cvtsi128_si64(ax2); idx3 = _mm_cvtsi128_si64(ax3); - + for (size_t i = 0; i < ITERATIONS/2; i++) { uint64_t hi, lo; @@ -954,7 +985,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) { memset(output, 0, 32 * 5); return; |