summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/cryptonight.h5
-rw-r--r--crypto/cryptonight_aesni.h37
-rw-r--r--crypto/cryptonight_common.cpp20
3 files changed, 32 insertions, 30 deletions
diff --git a/crypto/cryptonight.h b/crypto/cryptonight.h
index f4ec843..978c798 100644
--- a/crypto/cryptonight.h
+++ b/crypto/cryptonight.h
@@ -24,11 +24,6 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg);
cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg);
void cryptonight_free_ctx(cryptonight_ctx* ctx);
-void cryptonight_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* ctx);
-void cryptonight_hash_ctx_soft(const void* input, size_t len, void* output, cryptonight_ctx* ctx);
-void cryptonight_hash_ctx_np(const void* input, size_t len, void* output, cryptonight_ctx* ctx);
-void cryptonight_double_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1);
-
#ifdef __cplusplus
}
#endif
diff --git a/crypto/cryptonight_aesni.h b/crypto/cryptonight_aesni.h
index 37672c6..de0f186 100644
--- a/crypto/cryptonight_aesni.h
+++ b/crypto/cryptonight_aesni.h
@@ -279,7 +279,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
_mm_store_si128(output + 11, xout7);
}
-template<size_t ITERATIONS, size_t MEM, bool PREFETCH, bool SOFT_AES>
+template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -301,20 +301,28 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
{
__m128i cx;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
+
if(SOFT_AES)
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
+
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
+
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
- lo = _umul128(idx0, cl, &hi);
+
+ if(MULX)
+ lo = _mulx_u64(idx0, cl, (long long unsigned int*)&hi);
+ else
+ lo = _umul128(idx0, cl, &hi);
+
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
@@ -322,6 +330,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
+
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
}
@@ -338,7 +347,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<size_t ITERATIONS, size_t MEM, bool PREFETCH, bool SOFT_AES>
+template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -366,43 +375,61 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
{
__m128i cx;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
+
if(SOFT_AES)
cx = soft_aesenc(cx, ax0);
else
cx = _mm_aesenc_si128(cx, ax0);
+
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
+
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]);
+
if(SOFT_AES)
cx = soft_aesenc(cx, ax1);
else
cx = _mm_aesenc_si128(cx, ax1);
+
_mm_store_si128((__m128i *)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx));
idx1 = _mm_cvtsi128_si64(cx);
bx1 = cx;
+
if(PREFETCH)
_mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0);
uint64_t hi, lo;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
- lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
+
+ if(MULX)
+ lo = _mulx_u64(idx0, _mm_cvtsi128_si64(cx), (long long unsigned int*)&hi);
+ else
+ lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
+
ax0 = _mm_add_epi64(ax0, _mm_set_epi64x(lo, hi));
_mm_store_si128((__m128i*)&l0[idx0 & 0x1FFFF0], ax0);
ax0 = _mm_xor_si128(ax0, cx);
idx0 = _mm_cvtsi128_si64(ax0);
+
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]);
- lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
+
+ if(MULX)
+ lo = _mulx_u64(idx1, _mm_cvtsi128_si64(cx), (long long unsigned int*)&hi);
+ else
+ lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
+
ax1 = _mm_add_epi64(ax1, _mm_set_epi64x(lo, hi));
_mm_store_si128((__m128i*)&l1[idx1 & 0x1FFFF0], ax1);
ax1 = _mm_xor_si128(ax1, cx);
idx1 = _mm_cvtsi128_si64(ax1);
+
if(PREFETCH)
_mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0);
}
diff --git a/crypto/cryptonight_common.cpp b/crypto/cryptonight_common.cpp
index bbcff34..63ce3a4 100644
--- a/crypto/cryptonight_common.cpp
+++ b/crypto/cryptonight_common.cpp
@@ -190,23 +190,3 @@ void cryptonight_free_ctx(cryptonight_ctx* ctx)
_mm_free(ctx);
}
-
-void cryptonight_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* ctx)
-{
- cryptonight_hash<0x80000, MEMORY, true, false>(input, len, output, ctx);
-}
-
-void cryptonight_hash_ctx_soft(const void* input, size_t len, void* output, cryptonight_ctx* ctx)
-{
- cryptonight_hash<0x80000, MEMORY, true, true>(input, len, output, ctx);
-}
-
-void cryptonight_hash_ctx_np(const void* input, size_t len, void* output, cryptonight_ctx* ctx)
-{
- cryptonight_hash<0x80000, MEMORY, false, false>(input, len, output, ctx);
-}
-
-void cryptonight_double_hash_ctx(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1)
-{
- cryptonight_double_hash<0x80000, MEMORY, false, false>(input, len, output, ctx0, ctx1);
-}
OpenPOWER on IntegriCloud