summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
diff options
context:
space:
mode:
Diffstat (limited to 'xmrstak/backend/cpu/crypto/cryptonight_aesni.h')
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight_aesni.h89
1 files changed, 60 insertions, 29 deletions
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 7562de1..e15c474 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -28,7 +28,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
*hi = r >> 64;
return (uint64_t)r;
}
-#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
+
#else
#include <intrin.h>
#endif // __GNUC__
@@ -422,6 +422,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
_mm_store_si128(output + 11, xout7);
}
+template<xmrstak_algo ALGO>
inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = _mm_cvtsi128_si64(tmp);
@@ -431,10 +432,21 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
uint8_t x = static_cast<uint8_t>(vh >> 24);
static const uint16_t table = 0x7531;
- const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
- vh ^= ((table >> index) & 0x3) << 28;
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc)
+ {
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+ }
+ else if(ALGO == cryptonight_stellite)
+ {
+ const uint8_t index = (((x >> 4) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+ }
- mem_out[1] = vh;
}
template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
@@ -444,7 +456,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
constexpr size_t MEM = cn_select_memory<ALGO>();
- if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+ if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
{
memset(output, 0, 32);
return;
@@ -453,7 +465,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
uint64_t monero_const;
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
{
monero_const = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24);
@@ -482,8 +494,8 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
else
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
@@ -506,8 +518,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
_mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0);
ah0 += lo;
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ {
+ if(ALGO == cryptonight_ipbc)
+ ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0];
+ else
+ ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+ }
else
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
@@ -544,7 +561,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
constexpr size_t MEM = cn_select_memory<ALGO>();
- if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+ if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
{
memset(output, 0, 64);
return;
@@ -554,7 +571,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200);
uint64_t monero_const_0, monero_const_1;
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
{
monero_const_0 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
monero_const_0 ^= *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24);
@@ -592,8 +609,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
else
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
@@ -610,8 +627,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
else
_mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
@@ -631,8 +648,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
axh0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ {
+ if(ALGO == cryptonight_ipbc)
+ ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0];
+ else
+ ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+ }
else
((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
@@ -662,8 +684,13 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
axh1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon)
- ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite)
+ {
+ if(ALGO == cryptonight_ipbc)
+ ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0];
+ else
+ ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+ }
else
((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
@@ -701,7 +728,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
ptr = (__m128i *)&l[idx & MASK]; \
if(PREFETCH) \
_mm_prefetch((const char*)ptr, _MM_HINT_T0); \
- c = _mm_load_si128(ptr);
+ c = _mm_load_si128(ptr);
#define CN_STEP2(a, b, c, l, ptr, idx) \
if(SOFT_AES) \
@@ -709,8 +736,8 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else \
c = _mm_aesenc_si128(c, a); \
b = _mm_xor_si128(b, c); \
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \
- cryptonight_monero_tweak((uint64_t*)ptr, b); \
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \
+ cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \
else \
_mm_store_si128(ptr, b);\
@@ -724,8 +751,12 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \
lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
- if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon) \
+ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \
+ { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
+ if (ALGO == cryptonight_ipbc) \
+ ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\
+ } \
else \
_mm_store_si128(ptr, a);\
a = _mm_xor_si128(a, b); \
@@ -751,7 +782,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
constexpr size_t MEM = cn_select_memory<ALGO>();
- if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+ if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
{
memset(output, 0, 32 * 3);
return;
@@ -845,7 +876,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
constexpr size_t MEM = cn_select_memory<ALGO>();
- if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+ if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
{
memset(output, 0, 32 * 4);
return;
@@ -883,13 +914,13 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
-
+
uint64_t idx0, idx1, idx2, idx3;
idx0 = _mm_cvtsi128_si64(ax0);
idx1 = _mm_cvtsi128_si64(ax1);
idx2 = _mm_cvtsi128_si64(ax2);
idx3 = _mm_cvtsi128_si64(ax3);
-
+
for (size_t i = 0; i < ITERATIONS/2; i++)
{
uint64_t hi, lo;
@@ -954,7 +985,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
constexpr size_t MEM = cn_select_memory<ALGO>();
- if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43)
+ if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43)
{
memset(output, 0, 32 * 5);
return;
OpenPOWER on IntegriCloud