diff options
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/cryptonight.h | 5 | ||||
-rw-r--r-- | crypto/cryptonight_aesni.h | 37 | ||||
-rw-r--r-- | crypto/cryptonight_common.cpp | 20 |
3 files changed, 32 insertions, 30 deletions
diff --git a/crypto/cryptonight.h b/crypto/cryptonight.h index f4ec843..978c798 100644 --- a/crypto/cryptonight.h +++ b/crypto/cryptonight.h @@ -24,11 +24,6 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg); cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg); void cryptonight_free_ctx(cryptonight_ctx* ctx); -void cryptonight_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* ctx); -void cryptonight_hash_ctx_soft(const void* input, size_t len, void* output, cryptonight_ctx* ctx); -void cryptonight_hash_ctx_np(const void* input, size_t len, void* output, cryptonight_ctx* ctx); -void cryptonight_double_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1); - #ifdef __cplusplus } #endif diff --git a/crypto/cryptonight_aesni.h b/crypto/cryptonight_aesni.h index 37672c6..de0f186 100644 --- a/crypto/cryptonight_aesni.h +++ b/crypto/cryptonight_aesni.h @@ -279,7 +279,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } -template<size_t ITERATIONS, size_t MEM, bool PREFETCH, bool SOFT_AES> +template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX> void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0) { keccak((const uint8_t *)input, len, ctx0->hash_state, 200); @@ -301,20 +301,28 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c { __m128i cx; cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); + if(SOFT_AES) cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); idx0 = _mm_cvtsi128_si64(cx); bx0 = cx; + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); uint64_t hi, lo, cl, ch; cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); + + if(MULX) + lo = _mulx_u64(idx0, cl, (long long unsigned int*)&hi); + else + lo = _umul128(idx0, cl, &hi); + al0 += hi; ah0 += lo; ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; @@ -322,6 +330,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c ah0 ^= ch; al0 ^= cl; idx0 = al0; + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); } @@ -338,7 +347,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c // This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon // to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output // We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons) -template<size_t ITERATIONS, size_t MEM, bool PREFETCH, bool SOFT_AES> +template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX> void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1) { keccak((const uint8_t *)input, len, ctx0->hash_state, 200); @@ -366,43 +375,61 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto { __m128i cx; cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); + if(SOFT_AES) cx = soft_aesenc(cx, ax0); else cx = _mm_aesenc_si128(cx, ax0); + _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); idx0 = _mm_cvtsi128_si64(cx); bx0 = cx; + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]); + if(SOFT_AES) cx = soft_aesenc(cx, ax1); else cx = _mm_aesenc_si128(cx, ax1); + _mm_store_si128((__m128i *)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx)); idx1 = _mm_cvtsi128_si64(cx); bx1 = cx; + if(PREFETCH) _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0); uint64_t hi, lo; cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); - lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi); + + if(MULX) + lo = _mulx_u64(idx0, _mm_cvtsi128_si64(cx), (long long unsigned int*)&hi); + else + lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi); + ax0 = _mm_add_epi64(ax0, _mm_set_epi64x(lo, hi)); _mm_store_si128((__m128i*)&l0[idx0 & 0x1FFFF0], ax0); ax0 = _mm_xor_si128(ax0, cx); idx0 = _mm_cvtsi128_si64(ax0); + if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]); - lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi); + + if(MULX) + lo = _mulx_u64(idx1, _mm_cvtsi128_si64(cx), (long long unsigned int*)&hi); + else + lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi); + ax1 = _mm_add_epi64(ax1, _mm_set_epi64x(lo, hi)); _mm_store_si128((__m128i*)&l1[idx1 & 0x1FFFF0], ax1); ax1 = _mm_xor_si128(ax1, cx); idx1 = _mm_cvtsi128_si64(ax1); + if(PREFETCH) _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0); } diff --git a/crypto/cryptonight_common.cpp b/crypto/cryptonight_common.cpp index bbcff34..63ce3a4 100644 --- a/crypto/cryptonight_common.cpp +++ b/crypto/cryptonight_common.cpp @@ -190,23 +190,3 @@ void cryptonight_free_ctx(cryptonight_ctx* ctx) _mm_free(ctx); } - -void cryptonight_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* ctx) -{ - cryptonight_hash<0x80000, MEMORY, true, false>(input, len, output, ctx); -} - -void cryptonight_hash_ctx_soft(const void* input, size_t len, void* output, cryptonight_ctx* ctx) -{ - cryptonight_hash<0x80000, MEMORY, true, true>(input, len, output, ctx); -} - -void cryptonight_hash_ctx_np(const void* input, size_t len, void* output, cryptonight_ctx* ctx) -{ - cryptonight_hash<0x80000, MEMORY, false, false>(input, len, output, ctx); -} - -void cryptonight_double_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1) -{ - cryptonight_double_hash<0x80000, MEMORY, false, false>(input, len, output, ctx0, ctx1); -} |