From ec1e41cd8c78492ac5c657d0434d84bd9c491fb5 Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Wed, 26 Apr 2017 12:29:43 +0100 Subject: Remove mulx --- crypto/cryptonight_aesni.h | 25 ++++------------ jconf.cpp | 8 ----- jconf.h | 2 -- minethd.cpp | 74 ++++++++++++++++++++-------------------------- minethd.h | 4 +-- 5 files changed, 39 insertions(+), 74 deletions(-) diff --git a/crypto/cryptonight_aesni.h b/crypto/cryptonight_aesni.h index daf4037..6d990d3 100644 --- a/crypto/cryptonight_aesni.h +++ b/crypto/cryptonight_aesni.h @@ -27,12 +27,6 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) *hi = r >> 64; return (uint64_t)r; } - -__attribute__((target ("bmi2"))) static inline uint64_t _mulx_u64(uint64_t a, uint64_t b, uint64_t* hi) -{ - return _mulx_u64((unsigned long long)a, (unsigned long long)b, (unsigned long long*)hi); -} - #define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) #else #include @@ -284,7 +278,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } -template +template void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0) { keccak((const uint8_t *)input, len, ctx0->hash_state, 200); @@ -323,10 +317,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; - if(MULX) - lo = _mulx_u64(idx0, cl, &hi); - else - lo = _umul128(idx0, cl, &hi); + lo = _umul128(idx0, cl, &hi); al0 += hi; ah0 += lo; @@ -352,7 +343,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c // This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon // to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output // We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons) -template +template void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1) { keccak((const uint8_t *)input, len, ctx0->hash_state, 200); @@ -410,10 +401,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto uint64_t hi, lo; cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); - if(MULX) - lo = _mulx_u64(idx0, _mm_cvtsi128_si64(cx), &hi); - else - lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi); + lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi); ax0 = _mm_add_epi64(ax0, _mm_set_epi64x(lo, hi)); _mm_store_si128((__m128i*)&l0[idx0 & 0x1FFFF0], ax0); @@ -425,10 +413,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]); - if(MULX) - lo = _mulx_u64(idx1, _mm_cvtsi128_si64(cx), &hi); - else - lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi); + lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi); ax1 = _mm_add_epi64(ax1, _mm_set_epi64x(lo, hi)); _mm_store_si128((__m128i*)&l1[idx1 & 0x1FFFF0], ax1); diff --git a/jconf.cpp b/jconf.cpp index 82750cc..8ed1947 100644 --- a/jconf.cpp +++ b/jconf.cpp @@ -277,7 +277,6 @@ bool jconf::check_cpu_features() { constexpr int AESNI_BIT = 1 << 25; constexpr int SSE2_BIT = 1 << 26; - constexpr int BMI2_BIT = 1 << 8; int32_t cpu_info[4]; bool bHaveSse2; @@ -286,10 +285,6 @@ bool jconf::check_cpu_features() bHaveAes = (cpu_info[2] & AESNI_BIT) != 0; bHaveSse2 = (cpu_info[3] & SSE2_BIT) != 0; - cpuid(7, 0, cpu_info); - - bHaveBmi2 = (cpu_info[1] & BMI2_BIT) != 0; - return bHaveSse2; } @@ -462,9 +457,6 @@ bool jconf::parse_config(const char* sFilename) { if(!bHaveAes) printer::inst()->print_msg(L0, "Your CPU doesn't support hardware AES. Don't expect high hashrates."); - - if(bHaveBmi2) - printer::inst()->print_msg(L0, "CPU supports BMI2 instructions. Faster multiplication enabled."); } return true; diff --git a/jconf.h b/jconf.h index 08e2124..f932728 100644 --- a/jconf.h +++ b/jconf.h @@ -57,7 +57,6 @@ public: bool PreferIpv4(); inline bool HaveHardwareAes() { return bHaveAes; } - inline bool HaveMulx() { return bHaveBmi2; } static void cpuid(uint32_t eax, int32_t ecx, int32_t val[4]); @@ -70,5 +69,4 @@ private: opaque_private* prv; bool bHaveAes; - bool bHaveBmi2; }; diff --git a/minethd.cpp b/minethd.cpp index 55bf9e7..199779e 100644 --- a/minethd.cpp +++ b/minethd.cpp @@ -251,20 +251,20 @@ bool minethd::self_test() cn_hash_fun hashf; cn_hash_fun_dbl hashdf; - hashf = func_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx()); + hashf = func_selector(jconf::inst()->HaveHardwareAes(), false); hashf("This is a test", 14, out, ctx0); bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashf = func_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx()); + hashf = func_selector(jconf::inst()->HaveHardwareAes(), true); hashf("This is a test", 14, out, ctx0); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx()); + hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false); hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1); bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx()); + hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true); hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1); bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; @@ -338,28 +338,23 @@ void minethd::consume_work() iConsumeCnt++; } -minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx) +minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch) { - // We have three independent flag bits in the functions + // We have two independent flag bits in the functions // therefore we will build a binary digit and select the - // function as a three digit binary - // Digit order SOFT_AES, NO_PREFETCH, MULX - - static const cn_hash_fun func_table[8] = { - cryptonight_hash<0x80000, MEMORY, false, false, false>, - cryptonight_hash<0x80000, MEMORY, false, false, true>, - cryptonight_hash<0x80000, MEMORY, false, true, false>, - cryptonight_hash<0x80000, MEMORY, false, true, true>, - cryptonight_hash<0x80000, MEMORY, true, false, false>, - cryptonight_hash<0x80000, MEMORY, true, false, true>, - cryptonight_hash<0x80000, MEMORY, true, true, false>, - cryptonight_hash<0x80000, MEMORY, true, true, true> + // function as a two digit binary + // Digit order SOFT_AES, NO_PREFETCH + + static const cn_hash_fun func_table[4] = { + cryptonight_hash<0x80000, MEMORY, false, false>, + cryptonight_hash<0x80000, MEMORY, false, true>, + cryptonight_hash<0x80000, MEMORY, true, false>, + cryptonight_hash<0x80000, MEMORY, true, true> }; - std::bitset<3> digit; - digit.set(0, bMulx); - digit.set(1, !bNoPrefetch); - digit.set(2, !bHaveAes); + std::bitset<2> digit; + digit.set(0, !bNoPrefetch); + digit.set(1, !bHaveAes); return func_table[digit.to_ulong()]; } @@ -373,7 +368,7 @@ void minethd::work_main() uint32_t* piNonce; job_result result; - hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx()); + hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch); ctx = minethd_alloc_ctx(); piHashVal = (uint64_t*)(result.bResult + 24); @@ -430,28 +425,23 @@ void minethd::work_main() cryptonight_free_ctx(ctx); } -minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx) +minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch) { - // We have three independent flag bits in the functions + // We have two independent flag bits in the functions // therefore we will build a binary digit and select the - // function as a three digit binary - // Digit order SOFT_AES, NO_PREFETCH, MULX - - static const cn_hash_fun_dbl func_table[8] = { - cryptonight_double_hash<0x80000, MEMORY, false, false, false>, - cryptonight_double_hash<0x80000, MEMORY, false, false, true>, - cryptonight_double_hash<0x80000, MEMORY, false, true, false>, - cryptonight_double_hash<0x80000, MEMORY, false, true, true>, - cryptonight_double_hash<0x80000, MEMORY, true, false, false>, - cryptonight_double_hash<0x80000, MEMORY, true, false, true>, - cryptonight_double_hash<0x80000, MEMORY, true, true, false>, - cryptonight_double_hash<0x80000, MEMORY, true, true, true> + // function as a two digit binary + // Digit order SOFT_AES, NO_PREFETCH + + static const cn_hash_fun_dbl func_table[4] = { + cryptonight_double_hash<0x80000, MEMORY, false, false>, + cryptonight_double_hash<0x80000, MEMORY, false, true>, + cryptonight_double_hash<0x80000, MEMORY, true, false>, + cryptonight_double_hash<0x80000, MEMORY, true, true> }; - std::bitset<3> digit; - digit.set(0, bMulx); - digit.set(1, !bNoPrefetch); - digit.set(2, !bHaveAes); + std::bitset<2> digit; + digit.set(0, !bNoPrefetch); + digit.set(1, !bHaveAes); return func_table[digit.to_ulong()]; } @@ -469,7 +459,7 @@ void minethd::double_work_main() uint32_t iNonce; job_result res; - hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx()); + hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch); ctx0 = minethd_alloc_ctx(); ctx1 = minethd_alloc_ctx(); diff --git a/minethd.h b/minethd.h index fbee325..6ba269e 100644 --- a/minethd.h +++ b/minethd.h @@ -114,8 +114,8 @@ private: inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume) { return start | (resume * iThreadCount + iThreadNo) << 18; } - static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx); - static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx); + static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch); + static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch); void work_main(); void double_work_main(); -- cgit v1.1