summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/cpu/crypto
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-10-24 21:27:01 +0200
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-10-27 20:12:38 +0200
commit41451d91e8e50f2b6c4b07630987dbc5a58aecb9 (patch)
tree8d80830627ab256365434c649ddf9e0ceb93dca0 /xmrstak/backend/cpu/crypto
parent89749c32b736fd581d66441e93ca71c39707fcb2 (diff)
downloadxmr-stak-41451d91e8e50f2b6c4b07630987dbc5a58aecb9.zip
xmr-stak-41451d91e8e50f2b6c4b07630987dbc5a58aecb9.tar.gz
add aeon support to backend cpu
- update auto suggestion default and hwloc - extent hash function table to support aeon and xmr within one miner
Diffstat (limited to 'xmrstak/backend/cpu/crypto')
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight.h2
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight_aesni.h52
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight_common.cpp38
3 files changed, 56 insertions, 36 deletions
diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h
index 978c798..d07050e 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight.h
@@ -7,8 +7,8 @@ extern "C" {
#include <stddef.h>
#include <inttypes.h>
+#include "../../cryptonight.hpp"
-#define MEMORY 2097152
typedef struct {
uint8_t hash_state[224]; // Need only 200, explicit align
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 942d511..2a6a769 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -287,7 +287,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
_mm_store_si128(output + 11, xout7);
}
-template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -308,36 +308,36 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
for(size_t i = 0; i < ITERATIONS; i++)
{
__m128i cx;
- cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
+ cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]);
if(SOFT_AES)
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
- _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
+ _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
if(PREFETCH)
- _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
- ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
+ cl = ((uint64_t*)&l0[idx0 & MASK])[0];
+ ch = ((uint64_t*)&l0[idx0 & MASK])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
+ ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
if(PREFETCH)
- _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
}
// Optim - 90% time boundary
@@ -352,7 +352,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -381,66 +381,66 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
for (size_t i = 0; i < ITERATIONS; i++)
{
__m128i cx;
- cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
+ cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]);
if(SOFT_AES)
cx = soft_aesenc(cx, _mm_set_epi64x(axh0, axl0));
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
- _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
+ _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
if(PREFETCH)
- _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
- cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]);
+ cx = _mm_load_si128((__m128i *)&l1[idx1 & MASK]);
if(SOFT_AES)
cx = soft_aesenc(cx, _mm_set_epi64x(axh1, axl1));
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
- _mm_store_si128((__m128i *)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx));
+ _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
idx1 = _mm_cvtsi128_si64(cx);
bx1 = cx;
if(PREFETCH)
- _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
- ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
+ cl = ((uint64_t*)&l0[idx0 & MASK])[0];
+ ch = ((uint64_t*)&l0[idx0 & MASK])[1];
lo = _umul128(idx0, cl, &hi);
axl0 += hi;
axh0 += lo;
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = axl0;
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = axh0;
+ ((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
+ ((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
axh0 ^= ch;
axl0 ^= cl;
idx0 = axl0;
if(PREFETCH)
- _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
- cl = ((uint64_t*)&l1[idx1 & 0x1FFFF0])[0];
- ch = ((uint64_t*)&l1[idx1 & 0x1FFFF0])[1];
+ cl = ((uint64_t*)&l1[idx1 & MASK])[0];
+ ch = ((uint64_t*)&l1[idx1 & MASK])[1];
lo = _umul128(idx1, cl, &hi);
axl1 += hi;
axh1 += lo;
- ((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = axl1;
- ((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = axh1;
+ ((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
+ ((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
axh1 ^= ch;
axl1 ^= cl;
idx1 = axl1;
if(PREFETCH)
- _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0);
+ _mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
}
// Optim - 90% time boundary
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
index 0690415..70ad27c 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
+++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
@@ -30,6 +30,8 @@ extern "C"
}
#include "cryptonight.h"
#include "cryptonight_aesni.h"
+#include "../../../jconf.hpp"
+#include "../../cryptonight.hpp"
#include <stdio.h>
#include <stdlib.h>
@@ -194,12 +196,21 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
{
+ size_t hashMemSize;
+ if(::jconf::inst()->IsCurrencyXMR())
+ {
+ hashMemSize = XMR_MEMORY;
+ }
+ else
+ {
+ hashMemSize = AEON_MEMORY;
+ }
cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096);
if(use_fast_mem == 0)
{
// use 2MiB aligned memory
- ptr->long_state = (uint8_t*)_mm_malloc(MEMORY, 2*1024*1024);
+ ptr->long_state = (uint8_t*)_mm_malloc(hashMemSize, hashMemSize);
ptr->ctx_info[0] = 0;
ptr->ctx_info[1] = 0;
return ptr;
@@ -208,7 +219,7 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
#ifdef _WIN32
SIZE_T iLargePageMin = GetLargePageMinimum();
- if(MEMORY > iLargePageMin)
+ if(hashMemSize > iLargePageMin)
iLargePageMin *= 2;
ptr->long_state = (uint8_t*)VirtualAlloc(NULL, iLargePageMin,
@@ -231,13 +242,13 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
#else
#if defined(__APPLE__)
- ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE,
+ ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
- ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE,
+ ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
#else
- ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE,
+ ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
#endif
@@ -250,11 +261,11 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
ptr->ctx_info[0] = 1;
- if(madvise(ptr->long_state, MEMORY, MADV_RANDOM|MADV_WILLNEED) != 0)
+ if(madvise(ptr->long_state, hashMemSize, MADV_RANDOM|MADV_WILLNEED) != 0)
msg->warning = "madvise failed";
ptr->ctx_info[1] = 0;
- if(use_mlock != 0 && mlock(ptr->long_state, MEMORY) != 0)
+ if(use_mlock != 0 && mlock(ptr->long_state, hashMemSize) != 0)
msg->warning = "mlock failed";
else
ptr->ctx_info[1] = 1;
@@ -265,14 +276,23 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
void cryptonight_free_ctx(cryptonight_ctx* ctx)
{
+ size_t hashMemSize;
+ if(::jconf::inst()->IsCurrencyXMR())
+ {
+ hashMemSize = XMR_MEMORY;
+ }
+ else
+ {
+ hashMemSize = AEON_MEMORY;
+ }
if(ctx->ctx_info[0] != 0)
{
#ifdef _WIN32
VirtualFree(ctx->long_state, 0, MEM_RELEASE);
#else
if(ctx->ctx_info[1] != 0)
- munlock(ctx->long_state, MEMORY);
- munmap(ctx->long_state, MEMORY);
+ munlock(ctx->long_state, hashMemSize);
+ munmap(ctx->long_state, hashMemSize);
#endif // _WIN32
}
else
OpenPOWER on IntegriCloud