summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfireice-uk <fireice-uk@users.noreply.github.com>2017-04-26 12:29:43 +0100
committerfireice-uk <fireice-uk@users.noreply.github.com>2017-04-26 12:29:43 +0100
commitec1e41cd8c78492ac5c657d0434d84bd9c491fb5 (patch)
tree41180918ea6781795e7dd11dc225fc061e52377c
parentec789ab3d72e24d9cd526f38ef0240d392747976 (diff)
downloadxmr-stak-ec1e41cd8c78492ac5c657d0434d84bd9c491fb5.zip
xmr-stak-ec1e41cd8c78492ac5c657d0434d84bd9c491fb5.tar.gz
Remove mulx
-rw-r--r--crypto/cryptonight_aesni.h25
-rw-r--r--jconf.cpp8
-rw-r--r--jconf.h2
-rw-r--r--minethd.cpp74
-rw-r--r--minethd.h4
5 files changed, 39 insertions, 74 deletions
diff --git a/crypto/cryptonight_aesni.h b/crypto/cryptonight_aesni.h
index daf4037..6d990d3 100644
--- a/crypto/cryptonight_aesni.h
+++ b/crypto/cryptonight_aesni.h
@@ -27,12 +27,6 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
*hi = r >> 64;
return (uint64_t)r;
}
-
-__attribute__((target ("bmi2"))) static inline uint64_t _mulx_u64(uint64_t a, uint64_t b, uint64_t* hi)
-{
- return _mulx_u64((unsigned long long)a, (unsigned long long)b, (unsigned long long*)hi);
-}
-
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
#else
#include <intrin.h>
@@ -284,7 +278,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
_mm_store_si128(output + 11, xout7);
}
-template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
+template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -323,10 +317,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
- if(MULX)
- lo = _mulx_u64(idx0, cl, &hi);
- else
- lo = _umul128(idx0, cl, &hi);
+ lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
@@ -352,7 +343,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
+template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -410,10 +401,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
uint64_t hi, lo;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
- if(MULX)
- lo = _mulx_u64(idx0, _mm_cvtsi128_si64(cx), &hi);
- else
- lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
+ lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
ax0 = _mm_add_epi64(ax0, _mm_set_epi64x(lo, hi));
_mm_store_si128((__m128i*)&l0[idx0 & 0x1FFFF0], ax0);
@@ -425,10 +413,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]);
- if(MULX)
- lo = _mulx_u64(idx1, _mm_cvtsi128_si64(cx), &hi);
- else
- lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
+ lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
ax1 = _mm_add_epi64(ax1, _mm_set_epi64x(lo, hi));
_mm_store_si128((__m128i*)&l1[idx1 & 0x1FFFF0], ax1);
diff --git a/jconf.cpp b/jconf.cpp
index 82750cc..8ed1947 100644
--- a/jconf.cpp
+++ b/jconf.cpp
@@ -277,7 +277,6 @@ bool jconf::check_cpu_features()
{
constexpr int AESNI_BIT = 1 << 25;
constexpr int SSE2_BIT = 1 << 26;
- constexpr int BMI2_BIT = 1 << 8;
int32_t cpu_info[4];
bool bHaveSse2;
@@ -286,10 +285,6 @@ bool jconf::check_cpu_features()
bHaveAes = (cpu_info[2] & AESNI_BIT) != 0;
bHaveSse2 = (cpu_info[3] & SSE2_BIT) != 0;
- cpuid(7, 0, cpu_info);
-
- bHaveBmi2 = (cpu_info[1] & BMI2_BIT) != 0;
-
return bHaveSse2;
}
@@ -462,9 +457,6 @@ bool jconf::parse_config(const char* sFilename)
{
if(!bHaveAes)
printer::inst()->print_msg(L0, "Your CPU doesn't support hardware AES. Don't expect high hashrates.");
-
- if(bHaveBmi2)
- printer::inst()->print_msg(L0, "CPU supports BMI2 instructions. Faster multiplication enabled.");
}
return true;
diff --git a/jconf.h b/jconf.h
index 08e2124..f932728 100644
--- a/jconf.h
+++ b/jconf.h
@@ -57,7 +57,6 @@ public:
bool PreferIpv4();
inline bool HaveHardwareAes() { return bHaveAes; }
- inline bool HaveMulx() { return bHaveBmi2; }
static void cpuid(uint32_t eax, int32_t ecx, int32_t val[4]);
@@ -70,5 +69,4 @@ private:
opaque_private* prv;
bool bHaveAes;
- bool bHaveBmi2;
};
diff --git a/minethd.cpp b/minethd.cpp
index 55bf9e7..199779e 100644
--- a/minethd.cpp
+++ b/minethd.cpp
@@ -251,20 +251,20 @@ bool minethd::self_test()
cn_hash_fun hashf;
cn_hash_fun_dbl hashdf;
- hashf = func_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx());
+ hashf = func_selector(jconf::inst()->HaveHardwareAes(), false);
hashf("This is a test", 14, out, ctx0);
bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
- hashf = func_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx());
+ hashf = func_selector(jconf::inst()->HaveHardwareAes(), true);
hashf("This is a test", 14, out, ctx0);
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
- hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx());
+ hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false);
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
- hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx());
+ hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true);
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
@@ -338,28 +338,23 @@ void minethd::consume_work()
iConsumeCnt++;
}
-minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx)
+minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch)
{
- // We have three independent flag bits in the functions
+ // We have two independent flag bits in the functions
// therefore we will build a binary digit and select the
- // function as a three digit binary
- // Digit order SOFT_AES, NO_PREFETCH, MULX
-
- static const cn_hash_fun func_table[8] = {
- cryptonight_hash<0x80000, MEMORY, false, false, false>,
- cryptonight_hash<0x80000, MEMORY, false, false, true>,
- cryptonight_hash<0x80000, MEMORY, false, true, false>,
- cryptonight_hash<0x80000, MEMORY, false, true, true>,
- cryptonight_hash<0x80000, MEMORY, true, false, false>,
- cryptonight_hash<0x80000, MEMORY, true, false, true>,
- cryptonight_hash<0x80000, MEMORY, true, true, false>,
- cryptonight_hash<0x80000, MEMORY, true, true, true>
+ // function as a two digit binary
+ // Digit order SOFT_AES, NO_PREFETCH
+
+ static const cn_hash_fun func_table[4] = {
+ cryptonight_hash<0x80000, MEMORY, false, false>,
+ cryptonight_hash<0x80000, MEMORY, false, true>,
+ cryptonight_hash<0x80000, MEMORY, true, false>,
+ cryptonight_hash<0x80000, MEMORY, true, true>
};
- std::bitset<3> digit;
- digit.set(0, bMulx);
- digit.set(1, !bNoPrefetch);
- digit.set(2, !bHaveAes);
+ std::bitset<2> digit;
+ digit.set(0, !bNoPrefetch);
+ digit.set(1, !bHaveAes);
return func_table[digit.to_ulong()];
}
@@ -373,7 +368,7 @@ void minethd::work_main()
uint32_t* piNonce;
job_result result;
- hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx());
+ hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch);
ctx = minethd_alloc_ctx();
piHashVal = (uint64_t*)(result.bResult + 24);
@@ -430,28 +425,23 @@ void minethd::work_main()
cryptonight_free_ctx(ctx);
}
-minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx)
+minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch)
{
- // We have three independent flag bits in the functions
+ // We have two independent flag bits in the functions
// therefore we will build a binary digit and select the
- // function as a three digit binary
- // Digit order SOFT_AES, NO_PREFETCH, MULX
-
- static const cn_hash_fun_dbl func_table[8] = {
- cryptonight_double_hash<0x80000, MEMORY, false, false, false>,
- cryptonight_double_hash<0x80000, MEMORY, false, false, true>,
- cryptonight_double_hash<0x80000, MEMORY, false, true, false>,
- cryptonight_double_hash<0x80000, MEMORY, false, true, true>,
- cryptonight_double_hash<0x80000, MEMORY, true, false, false>,
- cryptonight_double_hash<0x80000, MEMORY, true, false, true>,
- cryptonight_double_hash<0x80000, MEMORY, true, true, false>,
- cryptonight_double_hash<0x80000, MEMORY, true, true, true>
+ // function as a two digit binary
+ // Digit order SOFT_AES, NO_PREFETCH
+
+ static const cn_hash_fun_dbl func_table[4] = {
+ cryptonight_double_hash<0x80000, MEMORY, false, false>,
+ cryptonight_double_hash<0x80000, MEMORY, false, true>,
+ cryptonight_double_hash<0x80000, MEMORY, true, false>,
+ cryptonight_double_hash<0x80000, MEMORY, true, true>
};
- std::bitset<3> digit;
- digit.set(0, bMulx);
- digit.set(1, !bNoPrefetch);
- digit.set(2, !bHaveAes);
+ std::bitset<2> digit;
+ digit.set(0, !bNoPrefetch);
+ digit.set(1, !bHaveAes);
return func_table[digit.to_ulong()];
}
@@ -469,7 +459,7 @@ void minethd::double_work_main()
uint32_t iNonce;
job_result res;
- hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx());
+ hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch);
ctx0 = minethd_alloc_ctx();
ctx1 = minethd_alloc_ctx();
diff --git a/minethd.h b/minethd.h
index fbee325..6ba269e 100644
--- a/minethd.h
+++ b/minethd.h
@@ -114,8 +114,8 @@ private:
inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume)
{ return start | (resume * iThreadCount + iThreadNo) << 18; }
- static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx);
- static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx);
+ static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch);
+ static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch);
void work_main();
void double_work_main();
OpenPOWER on IntegriCloud