summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/cpu
diff options
context:
space:
mode:
authorfireice-uk <fireice-uk@users.noreply.github.com>2018-03-25 22:40:01 +0100
committerGitHub <noreply@github.com>2018-03-25 22:40:01 +0100
commita036cd81592e3b3de804ba88bb8f94729ab60b7d (patch)
treef835fc9823d80e43bdbb65023b2aed5718ee1627 /xmrstak/backend/cpu
parent2ae7260b90fe3dbe835ba2489519510f0e57d770 (diff)
parent09a5dcce2c51d87d77244970d2c09bea3207da7a (diff)
downloadxmr-stak-a036cd81592e3b3de804ba88bb8f94729ab60b7d.zip
xmr-stak-a036cd81592e3b3de804ba88bb8f94729ab60b7d.tar.gz
Merge pull request #1208 from fireice-uk/dev2.3.0
release 2.3.0
Diffstat (limited to 'xmrstak/backend/cpu')
-rw-r--r--xmrstak/backend/cpu/autoAdjust.hpp33
-rw-r--r--xmrstak/backend/cpu/autoAdjustHwloc.hpp12
-rw-r--r--xmrstak/backend/cpu/config.tpl5
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight.h2
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight_aesni.h453
-rw-r--r--xmrstak/backend/cpu/crypto/cryptonight_common.cpp35
-rw-r--r--xmrstak/backend/cpu/minethd.cpp289
-rw-r--r--xmrstak/backend/cpu/minethd.hpp5
8 files changed, 606 insertions, 228 deletions
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp
index 7bdb14e..db805ec 100644
--- a/xmrstak/backend/cpu/autoAdjust.hpp
+++ b/xmrstak/backend/cpu/autoAdjust.hpp
@@ -33,25 +33,21 @@ class autoAdjust
{
public:
- size_t hashMemSize;
- size_t halfHashMemSize;
-
- autoAdjust()
+ bool printConfig()
{
+ size_t hashMemSizeKB;
+ size_t halfHashMemSizeKB;
+
if(::jconf::inst()->IsCurrencyMonero())
{
- hashMemSize = MONERO_MEMORY;
- halfHashMemSize = hashMemSize / 2u;
+ hashMemSizeKB = MONERO_MEMORY / 1024u;
+ halfHashMemSizeKB = hashMemSizeKB / 2u;
}
else
{
- hashMemSize = AEON_MEMORY;
- halfHashMemSize = hashMemSize / 2u;
+ hashMemSizeKB = AEON_MEMORY / 1024u;
+ halfHashMemSizeKB = hashMemSizeKB / 2u;
}
- }
-
- bool printConfig()
- {
configEditor configTpl{};
@@ -63,9 +59,10 @@ public:
std::string conf;
- if(!detectL3Size() || L3KB_size < halfHashMemSize || L3KB_size > (halfHashMemSize * 100u))
+
+ if(!detectL3Size() || L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048u))
{
- if(L3KB_size < halfHashMemSize || L3KB_size > (halfHashMemSize * 100))
+ if(L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048))
printer::inst()->print_msg(L0, "Autoconf failed: L3 size sanity check failed - %u KB.", L3KB_size);
conf += std::string(" { \"low_power_mode\" : false, \"no_prefetch\" : true, \"affine_to_cpu\" : false },\n");
@@ -88,7 +85,7 @@ public:
if(L3KB_size <= 0)
break;
- double_mode = L3KB_size / hashMemSize > (int32_t)(corecnt-i);
+ double_mode = L3KB_size / hashMemSizeKB > (int32_t)(corecnt-i);
conf += std::string(" { \"low_power_mode\" : ");
conf += std::string(double_mode ? "true" : "false");
@@ -107,9 +104,9 @@ public:
aff_id++;
if(double_mode)
- L3KB_size -= hashMemSize * 2u;
+ L3KB_size -= hashMemSizeKB * 2u;
else
- L3KB_size -= hashMemSize;
+ L3KB_size -= hashMemSizeKB;
}
}
@@ -142,7 +139,7 @@ private:
}
L3KB_size = ((get_masked(cpu_info[1], 31, 22) + 1) * (get_masked(cpu_info[1], 21, 12) + 1) *
- (get_masked(cpu_info[1], 11, 0) + 1) * (cpu_info[2] + 1)) / halfHashMemSize;
+ (get_masked(cpu_info[1], 11, 0) + 1) * (cpu_info[2] + 1)) / 1024;
return true;
}
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
index ddeb89b..568abb5 100644
--- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -28,16 +28,8 @@ public:
autoAdjust()
{
- if(::jconf::inst()->IsCurrencyMonero())
- {
- hashMemSize = MONERO_MEMORY;
- halfHashMemSize = hashMemSize / 2u;
- }
- else
- {
- hashMemSize = AEON_MEMORY;
- halfHashMemSize = hashMemSize / 2u;
- }
+ hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+ halfHashMemSize = hashMemSize / 2u;
}
bool printConfig()
diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl
index b21a22d..cb4b950 100644
--- a/xmrstak/backend/cpu/config.tpl
+++ b/xmrstak/backend/cpu/config.tpl
@@ -2,7 +2,7 @@ R"===(
/*
* Thread configuration for each thread. Make sure it matches the number above.
* low_power_mode - This can either be a boolean (true or false), or a number between 1 to 5. When set to true,
- this mode will double the cache usage, and double the single thread performance. It will
+ * this mode will double the cache usage, and double the single thread performance. It will
* consume much less power (as less cores are working), but will max out at around 80-85% of
* the maximum performance. When set to a number N greater than 1, this mode will increase the
* cache usage and single thread performance by N times.
@@ -24,6 +24,9 @@ R"===(
* { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 },
* { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 },
* ],
+ * If you do not wish to mine with your CPU(s) then use:
+ * "cpu_threads_conf" :
+ * null,
*/
"cpu_threads_conf" :
diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h
index 631c39a..5c9a733 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight.h
@@ -7,8 +7,6 @@ extern "C" {
#include <stddef.h>
#include <inttypes.h>
-#include "xmrstak/backend/cryptonight.hpp"
-
typedef struct {
uint8_t hash_state[224]; // Need only 200, explicit align
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 9b6e1dc..85373e8 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -16,6 +16,7 @@
#pragma once
#include "cryptonight.h"
+#include "xmrstak/backend/cryptonight.hpp"
#include <memory.h>
#include <stdio.h>
@@ -148,7 +149,20 @@ static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i
*x7 = soft_aesenc(*x7, key);
}
-template<size_t MEM, bool SOFT_AES, bool PREFETCH>
+inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
+{
+ __m128i tmp0 = x0;
+ x0 = _mm_xor_si128(x0, x1);
+ x1 = _mm_xor_si128(x1, x2);
+ x2 = _mm_xor_si128(x2, x3);
+ x3 = _mm_xor_si128(x3, x4);
+ x4 = _mm_xor_si128(x4, x5);
+ x5 = _mm_xor_si128(x5, x6);
+ x6 = _mm_xor_si128(x6, x7);
+ x7 = _mm_xor_si128(x7, tmp0);
+}
+
+template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO>
void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
@@ -166,6 +180,40 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output)
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
+ if(ALGO == cryptonight_heavy)
+ {
+ for(size_t i=0; i < 16; i++)
+ {
+ if(SOFT_AES)
+ {
+ soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ }
+ else
+ {
+ aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+ }
+ mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+ }
+ }
+
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
{
if(SOFT_AES)
@@ -213,7 +261,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output)
}
}
-template<size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO>
void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
@@ -275,6 +323,93 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
+
+ if(ALGO == cryptonight_heavy)
+ mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+ }
+
+ if(ALGO == cryptonight_heavy)
+ {
+ for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+ {
+ if(PREFETCH)
+ _mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA);
+
+ xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
+ xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
+ xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
+ xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
+
+ if(PREFETCH)
+ _mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA);
+
+ xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
+ xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
+ xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
+ xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
+
+ if(SOFT_AES)
+ {
+ soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ }
+ else
+ {
+ aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ }
+
+ if(ALGO == cryptonight_heavy)
+ mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+ }
+
+ for(size_t i=0; i < 16; i++)
+ {
+ if(SOFT_AES)
+ {
+ soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ }
+ else
+ {
+ aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+ }
+
+ mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+ }
}
_mm_store_si128(output + 4, xout0);
@@ -287,13 +422,45 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
_mm_store_si128(output + 11, xout7);
}
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+ mem_out[0] = _mm_cvtsi128_si64(tmp);
+
+ tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+ uint64_t vh = _mm_cvtsi128_si64(tmp);
+
+ uint8_t x = vh >> 24;
+ static const uint16_t table = 0x7531;
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+}
+
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
+ constexpr size_t MASK = cn_select_mask<ALGO>();
+ constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+ constexpr size_t MEM = cn_select_memory<ALGO>();
+
+ if(ALGO == cryptonight_monero && len < 43)
+ {
+ memset(output, 0, 32);
+ return;
+ }
+
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
+ uint64_t monero_const;
+ if(ALGO == cryptonight_monero)
+ {
+ monero_const = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
+ monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24);
+ }
+
// Optim - 99% time boundary
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);
uint8_t* l0 = ctx0->long_state;
uint64_t* h0 = (uint64_t*)ctx0->hash_state;
@@ -315,12 +482,16 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
- _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ if(ALGO == cryptonight_monero)
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ else
+ _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+
idx0 = _mm_cvtsi128_si64(cx);
- bx0 = cx;
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
+ bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & MASK])[0];
@@ -329,19 +500,33 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
lo = _umul128(idx0, cl, &hi);
al0 += hi;
- ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
- ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
- ah0 ^= ch;
al0 ^= cl;
+ if(PREFETCH)
+ _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0);
+ ah0 += lo;
+
+ if(ALGO == cryptonight_monero)
+ ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+ else
+ ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
+ ah0 ^= ch;
+
idx0 = al0;
- if(PREFETCH)
- _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
+ if(ALGO == cryptonight_heavy)
+ {
+ int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
+ int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
+ int64_t q = n / (d | 0x5);
+
+ ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
+ idx0 = d ^ q;
+ }
}
// Optim - 90% time boundary
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
// Optim - 99% time boundary
@@ -352,15 +537,34 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
{
+ constexpr size_t MASK = cn_select_mask<ALGO>();
+ constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+ constexpr size_t MEM = cn_select_memory<ALGO>();
+
+ if(ALGO == cryptonight_monero && len < 43)
+ {
+ memset(output, 0, 64);
+ return;
+ }
+
keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200);
keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200);
+ uint64_t monero_const_0, monero_const_1;
+ if(ALGO == cryptonight_monero)
+ {
+ monero_const_0 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
+ monero_const_0 ^= *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24);
+ monero_const_1 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + len + 35);
+ monero_const_1 ^= *(reinterpret_cast<const uint64_t*>(ctx[1]->hash_state) + 24);
+ }
+
// Optim - 99% time boundary
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state);
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state);
uint8_t* l0 = ctx[0]->long_state;
uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
@@ -388,7 +592,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
- _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ if(ALGO == cryptonight_monero)
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+ else
+ _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
@@ -402,7 +610,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
- _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+ if(ALGO == cryptonight_monero)
+ cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+ else
+ _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+
idx1 = _mm_cvtsi128_si64(cx);
bx1 = cx;
@@ -418,11 +630,26 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
axl0 += hi;
axh0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
- ((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
+
+ if(ALGO == cryptonight_monero)
+ ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+ else
+ ((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
+
axh0 ^= ch;
axl0 ^= cl;
idx0 = axl0;
+ if(ALGO == cryptonight_heavy)
+ {
+ int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
+ int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
+ int64_t q = n / (d | 0x5);
+
+ ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
+ idx0 = d ^ q;
+ }
+
if(PREFETCH)
_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
@@ -434,18 +661,33 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
axl1 += hi;
axh1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
- ((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
+
+ if(ALGO == cryptonight_monero)
+ ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+ else
+ ((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
+
axh1 ^= ch;
axl1 ^= cl;
idx1 = axl1;
+ if(ALGO == cryptonight_heavy)
+ {
+ int64_t n = ((int64_t*)&l1[idx1 & MASK])[0];
+ int32_t d = ((int32_t*)&l1[idx1 & MASK])[2];
+ int64_t q = n / (d | 0x5);
+
+ ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
+ idx1 = d ^ q;
+ }
+
if(PREFETCH)
_mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
}
// Optim - 90% time boundary
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state);
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state);
// Optim - 99% time boundary
@@ -456,12 +698,10 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
}
#define CN_STEP1(a, b, c, l, ptr, idx) \
- a = _mm_xor_si128(a, c); \
- idx = _mm_cvtsi128_si64(a); \
ptr = (__m128i *)&l[idx & MASK]; \
if(PREFETCH) \
_mm_prefetch((const char*)ptr, _MM_HINT_T0); \
- c = _mm_load_si128(ptr)
+ c = _mm_load_si128(ptr);
#define CN_STEP2(a, b, c, l, ptr, idx) \
if(SOFT_AES) \
@@ -469,30 +709,64 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
else \
c = _mm_aesenc_si128(c, a); \
b = _mm_xor_si128(b, c); \
- _mm_store_si128(ptr, b)
+ if(ALGO == cryptonight_monero) \
+ cryptonight_monero_tweak((uint64_t*)ptr, b); \
+ else \
+ _mm_store_si128(ptr, b);\
#define CN_STEP3(a, b, c, l, ptr, idx) \
idx = _mm_cvtsi128_si64(c); \
ptr = (__m128i *)&l[idx & MASK]; \
if(PREFETCH) \
_mm_prefetch((const char*)ptr, _MM_HINT_T0); \
- b = _mm_load_si128(ptr)
+ b = _mm_load_si128(ptr);
-#define CN_STEP4(a, b, c, l, ptr, idx) \
+#define CN_STEP4(a, b, c, l, mc, ptr, idx) \
lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
- _mm_store_si128(ptr, a)
+ if(ALGO == cryptonight_monero) \
+ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
+ else \
+ _mm_store_si128(ptr, a);\
+ a = _mm_xor_si128(a, b); \
+ idx = _mm_cvtsi128_si64(a); \
+ if(ALGO == cryptonight_heavy) \
+ { \
+ int64_t n = ((int64_t*)&l[idx & MASK])[0]; \
+ int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
+ int64_t q = n / (d | 0x5); \
+ ((int64_t*)&l[idx & MASK])[0] = n ^ q; \
+ idx = d ^ q; \
+ }
+
+#define CONST_INIT(ctx, n) \
+ __m128i mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + n * len + 35) ^ \
+ *(reinterpret_cast<const uint64_t*>((ctx)->hash_state) + 24), 0);
// This lovelier creation will do 3 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
void cryptonight_triple_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
{
+ constexpr size_t MASK = cn_select_mask<ALGO>();
+ constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+ constexpr size_t MEM = cn_select_memory<ALGO>();
+
+ if(ALGO == cryptonight_monero && len < 43)
+ {
+ memset(output, 0, 32 * 3);
+ return;
+ }
+
for (size_t i = 0; i < 3; i++)
{
keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
}
+ CONST_INIT(ctx[0], 0);
+ CONST_INIT(ctx[1], 1);
+ CONST_INIT(ctx[2], 2);
+
uint8_t* l0 = ctx[0]->long_state;
uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
uint8_t* l1 = ctx[1]->long_state;
@@ -510,9 +784,14 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
+ uint64_t idx0, idx1, idx2;
+ idx0 = _mm_cvtsi128_si64(ax0);
+ idx1 = _mm_cvtsi128_si64(ax1);
+ idx2 = _mm_cvtsi128_si64(ax2);
+
for (size_t i = 0; i < ITERATIONS/2; i++)
{
- uint64_t idx0, idx1, idx2, hi, lo;
+ uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2;
// EVEN ROUND
@@ -528,9 +807,9 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
- CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
- CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
- CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
+ CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -545,29 +824,44 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
- CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
- CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
- CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
+ CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
}
for (size_t i = 0; i < 3; i++)
{
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
keccakf((uint64_t*)ctx[i]->hash_state, 24);
extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
}
}
// This even lovelier creation will do 4 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
{
+ constexpr size_t MASK = cn_select_mask<ALGO>();
+ constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+ constexpr size_t MEM = cn_select_memory<ALGO>();
+
+ if(ALGO == cryptonight_monero && len < 43)
+ {
+ memset(output, 0, 32 * 4);
+ return;
+ }
+
for (size_t i = 0; i < 4; i++)
{
keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
}
+ CONST_INIT(ctx[0], 0);
+ CONST_INIT(ctx[1], 1);
+ CONST_INIT(ctx[2], 2);
+ CONST_INIT(ctx[3], 3);
+
uint8_t* l0 = ctx[0]->long_state;
uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
uint8_t* l1 = ctx[1]->long_state;
@@ -589,10 +883,16 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
-
+
+ uint64_t idx0, idx1, idx2, idx3;
+ idx0 = _mm_cvtsi128_si64(ax0);
+ idx1 = _mm_cvtsi128_si64(ax1);
+ idx2 = _mm_cvtsi128_si64(ax2);
+ idx3 = _mm_cvtsi128_si64(ax3);
+
for (size_t i = 0; i < ITERATIONS/2; i++)
{
- uint64_t idx0, idx1, idx2, idx3, hi, lo;
+ uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3;
// EVEN ROUND
@@ -611,10 +911,10 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
- CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
- CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
- CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
- CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3);
+ CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
+ CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -632,30 +932,46 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
- CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
- CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
- CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
- CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3);
+ CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
+ CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
}
for (size_t i = 0; i < 4; i++)
{
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
keccakf((uint64_t*)ctx[i]->hash_state, 24);
extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
}
}
// This most lovely creation will do 5 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
void cryptonight_penta_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
{
+ constexpr size_t MASK = cn_select_mask<ALGO>();
+ constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+ constexpr size_t MEM = cn_select_memory<ALGO>();
+
+ if(ALGO == cryptonight_monero && len < 43)
+ {
+ memset(output, 0, 32 * 5);
+ return;
+ }
+
for (size_t i = 0; i < 5; i++)
{
keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
- cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+ cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
}
+ CONST_INIT(ctx[0], 0);
+ CONST_INIT(ctx[1], 1);
+ CONST_INIT(ctx[2], 2);
+ CONST_INIT(ctx[3], 3);
+ CONST_INIT(ctx[4], 4);
+
uint8_t* l0 = ctx[0]->long_state;
uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
uint8_t* l1 = ctx[1]->long_state;
@@ -683,9 +999,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
__m128i cx3 = _mm_set_epi64x(0, 0);
__m128i cx4 = _mm_set_epi64x(0, 0);
+ uint64_t idx0, idx1, idx2, idx3, idx4;
+ idx0 = _mm_cvtsi128_si64(ax0);
+ idx1 = _mm_cvtsi128_si64(ax1);
+ idx2 = _mm_cvtsi128_si64(ax2);
+ idx3 = _mm_cvtsi128_si64(ax3);
+ idx4 = _mm_cvtsi128_si64(ax4);
+
for (size_t i = 0; i < ITERATIONS/2; i++)
{
- uint64_t idx0, idx1, idx2, idx3, idx4, hi, lo;
+ uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
// EVEN ROUND
@@ -707,11 +1030,11 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
- CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
- CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
- CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
- CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3);
- CN_STEP4(ax4, bx4, cx4, l4, ptr4, idx4);
+ CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
+ CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
+ CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -732,16 +1055,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
- CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
- CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
- CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
- CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3);
- CN_STEP4(ax4, cx4, bx4, l4, ptr4, idx4);
+ CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+ CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+ CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
+ CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
+ CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
}
for (size_t i = 0; i < 5; i++)
{
- cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+ cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
keccakf((uint64_t*)ctx[i]->hash_state, 24);
extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
}
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
index 8b2207d..17fa24b 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
+++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
@@ -28,9 +28,10 @@ extern "C"
#include "c_jh.h"
#include "c_skein.h"
}
+#include "xmrstak/backend/cryptonight.hpp"
#include "cryptonight.h"
#include "cryptonight_aesni.h"
-#include "xmrstak/backend/cryptonight.hpp"
+#include "xmrstak/misc/console.hpp"
#include "xmrstak/jconf.hpp"
#include <stdio.h>
#include <stdlib.h>
@@ -73,6 +74,8 @@ void do_skein_hash(const void* input, size_t len, char* output) {
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
#ifdef _WIN32
+#include "xmrstak/misc/uac.hpp"
+
BOOL bRebootDesirable = FALSE; //If VirtualAlloc fails, suggest a reboot
BOOL AddPrivilege(TCHAR* pszPrivilege)
@@ -176,13 +179,16 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
if(AddPrivilege(TEXT("SeLockMemoryPrivilege")) == 0)
{
+ printer::inst()->print_msg(L0, "Elevating because we need to set up fast memory privileges.");
+ RequestElevation();
+
if(AddLargePageRights())
{
msg->warning = "Added SeLockMemoryPrivilege to the current account. You need to reboot for it to work";
bRebootDesirable = TRUE;
}
else
- msg->warning = "Obtaning SeLockMemoryPrivilege failed.";
+ msg->warning = "Obtaining SeLockMemoryPrivilege failed.";
return 0;
}
@@ -196,15 +202,8 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
{
- size_t hashMemSize;
- if(::jconf::inst()->IsCurrencyMonero())
- {
- hashMemSize = MONERO_MEMORY;
- }
- else
- {
- hashMemSize = AEON_MEMORY;
- }
+ size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+
cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096);
if(use_fast_mem == 0)
@@ -247,6 +246,9 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
#elif defined(__FreeBSD__)
ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
+#elif defined(__OpenBSD__)
+ ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
#else
ptr->long_state = (uint8_t*)mmap(0, hashMemSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
@@ -276,15 +278,8 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
void cryptonight_free_ctx(cryptonight_ctx* ctx)
{
- size_t hashMemSize;
- if(::jconf::inst()->IsCurrencyMonero())
- {
- hashMemSize = MONERO_MEMORY;
- }
- else
- {
- hashMemSize = AEON_MEMORY;
- }
+ size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+
if(ctx->ctx_info[0] != 0)
{
#ifdef _WIN32
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index 143b66f..e263aca 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -73,7 +73,16 @@ namespace cpu
bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id)
{
#if defined(_WIN32)
- return SetThreadAffinityMask(h, 1ULL << cpu_id) != 0;
+ // we can only pin up to 64 threads
+ if(cpu_id < 64)
+ {
+ return SetThreadAffinityMask(h, 1ULL << cpu_id) != 0;
+ }
+ else
+ {
+ printer::inst()->print_msg(L0, "WARNING: Windows supports only affinity up to 63.");
+ return false;
+ }
#elif defined(__APPLE__)
thread_port_t mach_thread;
thread_affinity_policy_data_t policy = { static_cast<integer_t>(cpu_id) };
@@ -84,6 +93,8 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id
CPU_ZERO(&mn);
CPU_SET(cpu_id, &mn);
return pthread_setaffinity_np(h, sizeof(cpuset_t), &mn) == 0;
+#elif defined(__OpenBSD__)
+ printer::inst()->print_msg(L0,"WARNING: thread pinning is not supported under OPENBSD.");
#else
cpu_set_t mn;
CPU_ZERO(&mn);
@@ -220,45 +231,44 @@ bool minethd::self_test()
bool bResult = true;
- bool mineMonero = ::jconf::inst()->IsCurrencyMonero();
- if(mineMonero)
+ if(::jconf::inst()->GetMiningAlgo() == cryptonight)
{
unsigned char out[32 * MAX_N];
cn_hash_fun hashf;
cn_hash_fun_multi hashf_multi;
- hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+ hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
hashf("This is a test", 14, out, ctx[0]);
bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
- hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, mineMonero);
+ hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
hashf("This is a test", 14, out, ctx[0]);
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
- hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+ hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
- hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, mineMonero);
+ hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
- hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+ hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx);
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0;
- hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+ hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx);
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0;
- hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+ hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx);
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
@@ -266,6 +276,12 @@ bool minethd::self_test()
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0;
}
+ else if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
+ {
+ }
+ else if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+ {
+ }
for (int i = 0; i < MAX_N; i++)
cryptonight_free_ctx(ctx[i]);
@@ -307,7 +323,7 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
if(cfg.iCpuAff >= 0)
{
#if defined(__APPLE__)
- printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
+ printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
#endif
printer::inst()->print_msg(L1, "Starting %dx thread, affinity: %d.", cfg.iMultiway, (int)cfg.iCpuAff);
@@ -329,48 +345,56 @@ void minethd::consume_work()
globalStates::inst().inst().iConsumeCnt++;
}
-minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero)
+minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
{
// We have two independent flag bits in the functions
// therefore we will build a binary digit and select the
// function as a two digit binary
- // Digit order SOFT_AES, NO_PREFETCH, MINER_ALGO
+
+ uint8_t algv;
+ switch(algo)
+ {
+ case cryptonight:
+ algv = 2;
+ break;
+ case cryptonight_lite:
+ algv = 1;
+ break;
+ case cryptonight_monero:
+ algv = 0;
+ break;
+ case cryptonight_heavy:
+ algv = 3;
+ break;
+ default:
+ algv = 2;
+ break;
+ }
static const cn_hash_fun func_table[] = {
- /* there will be 8 function entries if `CONF_NO_MONERO` and `CONF_NO_AEON`
- * is not defined. If one is defined there will be 4 entries.
- */
-#ifndef CONF_NO_MONERO
- cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
- cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
- cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
- cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>
-#endif
-#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO))
- // comma will be added only if Monero and Aeon is build
- ,
-#endif
-#ifndef CONF_NO_AEON
- cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
- cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
- cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
- cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>
-#endif
+ cryptonight_hash<cryptonight_monero, false, false>,
+ cryptonight_hash<cryptonight_monero, true, false>,
+ cryptonight_hash<cryptonight_monero, false, true>,
+ cryptonight_hash<cryptonight_monero, true, true>,
+ cryptonight_hash<cryptonight_lite, false, false>,
+ cryptonight_hash<cryptonight_lite, true, false>,
+ cryptonight_hash<cryptonight_lite, false, true>,
+ cryptonight_hash<cryptonight_lite, true, true>,
+ cryptonight_hash<cryptonight, false, false>,
+ cryptonight_hash<cryptonight, true, false>,
+ cryptonight_hash<cryptonight, false, true>,
+ cryptonight_hash<cryptonight, true, true>,
+ cryptonight_hash<cryptonight_heavy, false, false>,
+ cryptonight_hash<cryptonight_heavy, true, false>,
+ cryptonight_hash<cryptonight_heavy, false, true>,
+ cryptonight_hash<cryptonight_heavy, true, true>
};
- std::bitset<3> digit;
- digit.set(0, !bNoPrefetch);
- digit.set(1, !bHaveAes);
-
- // define aeon settings
-#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO)
- // ignore 3rd bit if only one currency is active
- digit.set(2, 0);
-#else
- digit.set(2, !mineMonero);
-#endif
+ std::bitset<2> digit;
+ digit.set(0, !bHaveAes);
+ digit.set(1, !bNoPrefetch);
- return func_table[digit.to_ulong()];
+ return func_table[ algv << 2 | digit.to_ulong() ];
}
void minethd::work_main()
@@ -390,7 +414,7 @@ void minethd::work_main()
uint32_t* piNonce;
job_result result;
- hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero());
+ hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo());
ctx = minethd_alloc_ctx();
piHashVal = (uint64_t*)(result.bResult + 24);
@@ -423,6 +447,22 @@ void minethd::work_main()
if(oWork.bNiceHash)
result.iNonce = *piNonce;
+ if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+ {
+ if(oWork.bWorkBlob[0] >= 7)
+ hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero);
+ else
+ hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+ }
+
+ if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy)
+ {
+ if(oWork.bWorkBlob[0] >= 3)
+ hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy);
+ else
+ hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+ }
+
while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
if ((iCount++ & 0xF) == 0) //Store stats every 16 hashes
@@ -437,12 +477,13 @@ void minethd::work_main()
globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
}
- *piNonce = ++result.iNonce;
+ *piNonce = result.iNonce;
hash_fun(oWork.bWorkBlob, oWork.iWorkSize, result.bResult, ctx);
if (*piHashVal < oWork.iTarget)
executor::inst()->push_event(ex_event(result, oWork.iPoolId));
+ result.iNonce++;
std::this_thread::yield();
}
@@ -453,93 +494,105 @@ void minethd::work_main()
cryptonight_free_ctx(ctx);
}
-minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero)
+minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
{
// We have two independent flag bits in the functions
// therefore we will build a binary digit and select the
// function as a two digit binary
- // Digit order SOFT_AES, NO_PREFETCH
+
+ uint8_t algv;
+ switch(algo)
+ {
+ case cryptonight:
+ algv = 2;
+ break;
+ case cryptonight_lite:
+ algv = 1;
+ break;
+ case cryptonight_monero:
+ algv = 0;
+ break;
+ default:
+ algv = 2;
+ break;
+ }
static const cn_hash_fun_multi func_table[] = {
- /* there will be 8*(MAX_N-1) function entries if `CONF_NO_MONERO` and `CONF_NO_AEON`
- * is not defined. If one is defined there will be 4*(MAX_N-1) entries.
- */
-#ifndef CONF_NO_MONERO
- cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
- cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
- cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
- cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
- cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
- cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
- cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
- cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
- cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
- cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
- cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
- cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
- cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
- cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
- cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
- cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>
-#endif
-#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO))
- // comma will be added only if Monero and Aeon is build
- ,
-#endif
-#ifndef CONF_NO_AEON
- cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
- cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
- cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
- cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
- cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
- cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
- cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
- cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
- cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
- cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
- cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
- cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
- cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
- cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
- cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
- cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>
-#endif
+ cryptonight_double_hash<cryptonight_monero, false, false>,
+ cryptonight_double_hash<cryptonight_monero, true, false>,
+ cryptonight_double_hash<cryptonight_monero, false, true>,
+ cryptonight_double_hash<cryptonight_monero, true, true>,
+ cryptonight_triple_hash<cryptonight_monero, false, false>,
+ cryptonight_triple_hash<cryptonight_monero, true, false>,
+ cryptonight_triple_hash<cryptonight_monero, false, true>,
+ cryptonight_triple_hash<cryptonight_monero, true, true>,
+ cryptonight_quad_hash<cryptonight_monero, false, false>,
+ cryptonight_quad_hash<cryptonight_monero, true, false>,
+ cryptonight_quad_hash<cryptonight_monero, false, true>,
+ cryptonight_quad_hash<cryptonight_monero, true, true>,
+ cryptonight_penta_hash<cryptonight_monero, false, false>,
+ cryptonight_penta_hash<cryptonight_monero, true, false>,
+ cryptonight_penta_hash<cryptonight_monero, false, true>,
+ cryptonight_penta_hash<cryptonight_monero, true, true>,
+ cryptonight_double_hash<cryptonight_lite, false, false>,
+ cryptonight_double_hash<cryptonight_lite, true, false>,
+ cryptonight_double_hash<cryptonight_lite, false, true>,
+ cryptonight_double_hash<cryptonight_lite, true, true>,
+ cryptonight_triple_hash<cryptonight_lite, false, false>,
+ cryptonight_triple_hash<cryptonight_lite, true, false>,
+ cryptonight_triple_hash<cryptonight_lite, false, true>,
+ cryptonight_triple_hash<cryptonight_lite, true, true>,
+ cryptonight_quad_hash<cryptonight_lite, false, false>,
+ cryptonight_quad_hash<cryptonight_lite, true, false>,
+ cryptonight_quad_hash<cryptonight_lite, false, true>,
+ cryptonight_quad_hash<cryptonight_lite, true, true>,
+ cryptonight_penta_hash<cryptonight_lite, false, false>,
+ cryptonight_penta_hash<cryptonight_lite, true, false>,
+ cryptonight_penta_hash<cryptonight_lite, false, true>,
+ cryptonight_penta_hash<cryptonight_lite, true, true>,
+ cryptonight_double_hash<cryptonight, false, false>,
+ cryptonight_double_hash<cryptonight, true, false>,
+ cryptonight_double_hash<cryptonight, false, true>,
+ cryptonight_double_hash<cryptonight, true, true>,
+ cryptonight_triple_hash<cryptonight, false, false>,
+ cryptonight_triple_hash<cryptonight, true, false>,
+ cryptonight_triple_hash<cryptonight, false, true>,
+ cryptonight_triple_hash<cryptonight, true, true>,
+ cryptonight_quad_hash<cryptonight, false, false>,
+ cryptonight_quad_hash<cryptonight, true, false>,
+ cryptonight_quad_hash<cryptonight, false, true>,
+ cryptonight_quad_hash<cryptonight, true, true>,
+ cryptonight_penta_hash<cryptonight, false, false>,
+ cryptonight_penta_hash<cryptonight, true, false>,
+ cryptonight_penta_hash<cryptonight, false, true>,
+ cryptonight_penta_hash<cryptonight, true, true>
};
std::bitset<2> digit;
- digit.set(0, !bNoPrefetch);
- digit.set(1, !bHaveAes);
-
- // define aeon settings
-#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO)
- // ignore miner algo if only one currency is active
- size_t miner_algo_base = 0;
-#else
- size_t miner_algo_base = mineMonero ? 0 : 4*(MAX_N-1);
-#endif
-
- N = (N<2) ? 2 : (N>MAX_N) ? MAX_N : N;
- return func_table[miner_algo_base + 4*(N-2) + digit.to_ulong()];
+ digit.set(0, !bHaveAes);
+ digit.set(1, !bNoPrefetch);
+
+ return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()];
}
void minethd::double_work_main()
{
- multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+ multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
}
void minethd::triple_work_main()
{
- multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+ multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
}
void minethd::quad_work_main()
{
- multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+ multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
}
void minethd::penta_work_main()
{
- multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+ multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
}
template<size_t N>
@@ -609,6 +662,22 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi)
if(oWork.bNiceHash)
iNonce = *piNonce[0];
+ if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+ {
+ if(oWork.bWorkBlob[0] >= 7)
+ hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero);
+ else
+ hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+ }
+
+ if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy)
+ {
+ if(oWork.bWorkBlob[0] >= 3)
+ hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy);
+ else
+ hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+ }
+
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
if ((iCount++ & 0x7) == 0) //Store stats every 8*N hashes
@@ -626,7 +695,7 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi)
}
for (size_t i = 0; i < N; i++)
- *piNonce[i] = ++iNonce;
+ *piNonce[i] = iNonce++;
hash_fun_multi(bWorkBlob, oWork.iWorkSize, bHashOut, ctx);
@@ -634,7 +703,7 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi)
{
if (*piHashVal[i] < oWork.iTarget)
{
- executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce - N + 1 + i, bHashOut + 32 * i, iThreadNo), oWork.iPoolId));
+ executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce - N + i, bHashOut + 32 * i, iThreadNo), oWork.iPoolId));
}
}
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 0433d0d..ef1bbd2 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include "xmrstak/jconf.hpp"
#include "crypto/cryptonight.h"
#include "xmrstak/backend/miner_work.hpp"
#include "xmrstak/backend/iBackend.hpp"
@@ -23,14 +24,14 @@ public:
typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
- static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero);
+ static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id);
static cryptonight_ctx* minethd_alloc_ctx();
private:
typedef void (*cn_hash_fun_multi)(const void*, size_t, void*, cryptonight_ctx**);
- static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero);
+ static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity);
OpenPOWER on IntegriCloud