diff options
author | Tony Butler <spudz76@gmail.com> | 2018-05-03 16:00:58 +0100 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2018-06-04 21:07:11 +0000 |
commit | 7c587421f1050dac43d612b03f73c405928031b3 (patch) | |
tree | b19a8938da8922ff02c047a5b35aebe6aecea428 /xmrstak/backend/nvidia | |
parent | 84471e3d48b0423410384d41c2ce467b19f2b73a (diff) | |
download | xmr-stak-7c587421f1050dac43d612b03f73c405928031b3.zip xmr-stak-7c587421f1050dac43d612b03f73c405928031b3.tar.gz |
Spell check
Diffstat (limited to 'xmrstak/backend/nvidia')
-rw-r--r-- | xmrstak/backend/nvidia/autoAdjust.hpp | 4 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/config.tpl | 2 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.cpp | 10 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/minethd.hpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp | 2 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp | 20 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 19 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 14 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp | 4 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp | 20 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp | 12 | ||||
-rw-r--r-- | xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp | 20 |
12 files changed, 64 insertions, 65 deletions
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index d8bb621..1246809 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -21,7 +21,7 @@ namespace nvidia { class autoAdjust -{ +{ public: autoAdjust() @@ -42,7 +42,7 @@ public: // evaluate config parameter for if auto adjustment is needed for(int i = 0; i < deviceCount; i++) { - + nvid_ctx ctx; ctx.device_id = i; diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl index f489956..2aa68dc 100644 --- a/xmrstak/backend/nvidia/config.tpl +++ b/xmrstak/backend/nvidia/config.tpl @@ -22,7 +22,7 @@ R"===( * A filled out configuration should look like this: * "gpu_threads_conf" : * [ - * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, + * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, * "affine_to_cpu" : false, "sync_mode" : 3, * }, * ], diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 8e66f34..d14fbd4 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -181,7 +181,7 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor } else printer::inst()->print_msg(L1, "Starting NVIDIA GPU thread %d, no affinity.", i); - + minethd* thd = new minethd(pWork, i + threadOffset, cfg); pvThreads->push_back(thd); @@ -223,7 +223,7 @@ void minethd::work_main() if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1) { - printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exiting.\n", (int)iThreadNo); std::exit(0); } @@ -237,11 +237,11 @@ void minethd::work_main() uint64_t iCount = 0; cryptonight_ctx* cpu_ctx; cpu_ctx = cpu::minethd::minethd_alloc_ctx(); - + // start with root algorithm and switch later if fork version is reached auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); - + uint32_t iNonce; globalStates::inst().iConsumeCnt++; @@ -299,7 +299,7 @@ void minethd::work_main() { globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, h_per_round * 16); } - + uint32_t foundNonce[10]; uint32_t foundCount; diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp index 89c2944..ad541bf 100644 --- a/xmrstak/backend/nvidia/minethd.hpp +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -33,7 +33,7 @@ private: minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg); void start_mining(); - + void work_main(); void consume_work(); diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index c1e31b9..d588641 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -14,7 +14,7 @@ typedef struct { int device_blocks; int device_threads; int device_bfactor; - int device_bsleep; + int device_bsleep; int syncMode; uint32_t *d_input; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp index 340174c..611fe1c 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp @@ -95,7 +95,7 @@ __device__ void cn_blake_update(blake_state * S, const uint8_t * data, uint64_ uint32_t left = S->buflen >> 3; uint32_t fill = 64 - left; - if (left && (((datalen >> 3) & 0x3F) >= fill)) + if (left && (((datalen >> 3) & 0x3F) >= fill)) { memcpy((void *) (S->buf + left), (void *) data, fill); S->t[0] += 512; @@ -106,7 +106,7 @@ __device__ void cn_blake_update(blake_state * S, const uint8_t * data, uint64_ left = 0; } - while (datalen >= 512) + while (datalen >= 512) { S->t[0] += 512; if (S->t[0] == 0) S->t[1]++; @@ -115,12 +115,12 @@ __device__ void cn_blake_update(blake_state * S, const uint8_t * data, uint64_ datalen -= 512; } - if (datalen > 0) + if (datalen > 0) { memcpy((void *) (S->buf + left), (void *) data, datalen >> 3); S->buflen = (left << 3) + datalen; } - else + else { S->buflen = 0; } @@ -128,7 +128,7 @@ __device__ void cn_blake_update(blake_state * S, const uint8_t * data, uint64_ __device__ void cn_blake_final(blake_state * S, uint8_t * digest) { - const uint8_t padding[] = + const uint8_t padding[] = { 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @@ -141,20 +141,20 @@ __device__ void cn_blake_final(blake_state * S, uint8_t * digest) U32TO8(msglen + 0, hi); U32TO8(msglen + 4, lo); - if (S->buflen == 440) + if (S->buflen == 440) { S->t[0] -= 8; cn_blake_update(S, &pa, 8); - } - else + } + else { - if (S->buflen < 440) + if (S->buflen < 440) { if (S->buflen == 0) S->nullt = 1; S->t[0] -= 440 - S->buflen; cn_blake_update(S, padding, 440 - S->buflen); } - else + else { S->t[0] -= 512 - S->buflen; cn_blake_update(S, padding, 512 - S->buflen); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 4eacfb6..3d8af48 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -170,7 +170,7 @@ __forceinline__ __device__ void unusedVar( const T& ) * - for <sm_30 shared memory is needed * * group_n - must be a power of 2! - * + * * @param ptr pointer to shared memory, size must be `threadIdx.x * sizeof(uint32_t)` * value can be NULL for compute architecture >=sm_30 * @param sub thread number within the group, range [0:group_n] @@ -293,7 +293,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti } else storeGlobal32( long_state + j, z ); - + //MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & MASK]); j = ( ( *t1 & MASK ) >> 2 ) + sub; @@ -312,23 +312,22 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti res = *( (uint64_t *) t2 ) >> ( sub & 1 ? 32 : 0 ); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) { const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res; uint32_t long_state_update = sub2 ? tweaked_res : res; - + if (ALGO == cryptonight_ipbc) { uint32_t value = shuffle<4>(sPtr,sub, long_state_update, sub & 1) ^ long_state_update; long_state_update = sub >= 2 ? value : long_state_update; } - + storeGlobal32( long_state + j, long_state_update ); } else storeGlobal32( long_state + j, res ); - + a = ( sub & 1 ? yy[1] : yy[0] ) ^ res; idx0 = shuffle<4>(sPtr,sub, a, 0); if(ALGO == cryptonight_heavy) @@ -379,7 +378,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti MEMCPY8( text, d_ctx_state + thread * 50 + sub + 16, 2 ); __syncthreads( ); - + #if( __CUDA_ARCH__ < 300 ) extern __shared__ uint32_t shuffleMem[]; volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFF8)); @@ -394,7 +393,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; cn_aes_pseudo_round_mut( sharedMemory, text, key ); - + if(ALGO == cryptonight_heavy) { #pragma unroll @@ -431,7 +430,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,MEMORY><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, - ctx->d_long_state, + ctx->d_long_state, (ALGO == cryptonight_heavy ? ctx->d_ctx_state2 : ctx->d_ctx_state), ctx->d_ctx_key1 )); @@ -474,7 +473,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) for ( int i = 0; i < roundsPhase3; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< grid, block8, block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index f192f01..304997e 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -184,7 +184,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 uint32_t * __restrict__ ctx_state = d_ctx_state + thread * 50; uint64_t hash[4]; uint32_t state[50]; - + #pragma unroll for ( i = 0; i < 50; i++ ) state[i] = ctx_state[i]; @@ -296,7 +296,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) } else ctx->d_ctx_state2 = ctx->d_ctx_state; - + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize)); @@ -472,7 +472,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20); if(it == arch.end()) { - printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch); + printf("WARNING: NVIDIA GPU %d: miner not compiled for CUDA architecture %d.\n", ctx->device_id, gpuArch); return 5; } } @@ -490,7 +490,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) minSupportedArch = arch[i]; if(minSupportedArch < 30 || gpuArch < minSupportedArch) { - printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch); + printf("WARNING: NVIDIA GPU %d: miner not compiled for CUDA architecture %d.\n", ctx->device_id, gpuArch); return 5; } } @@ -517,7 +517,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) */ ctx->device_threads = 64; constexpr size_t byteToMiB = 1024u * 1024u; - + // no limit by default 1TiB size_t maxMemUsage = byteToMiB * byteToMiB; if(props.major == 6) @@ -575,7 +575,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) CUDA_CHECK(ctx->device_id, cudaFree(tmp)); // delete created context on the gpu CUDA_CHECK(ctx->device_id, cudaDeviceReset()); - + ctx->total_device_memory = totalMemory; ctx->free_device_memory = freeMemory; @@ -614,7 +614,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) size_t perThread = hashMemSize + 16192u + 680u; if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) perThread += 50 * 4; // state double buffer - + size_t max_intensity = limitedMemory / perThread; ctx->device_threads = max_intensity / ctx->device_blocks; // use only odd number of threads diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp index 055a8bd..4d369f8 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp @@ -36,8 +36,8 @@ __forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int { asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); - } - else + } + else { asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp index a37934c..d5a98b7 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp @@ -167,7 +167,7 @@ __device__ void cn_groestl_F512(uint32_t * __restrict__ h, const uint32_t * __re uint32_t y[2*GROESTL_COLS512]; uint32_t z[2*GROESTL_COLS512]; - for (i = 0; i < 2*GROESTL_COLS512; i++) + for (i = 0; i < 2*GROESTL_COLS512; i++) { z[i] = m[i]; Ptmp[i] = h[i]^m[i]; @@ -227,23 +227,23 @@ __device__ void cn_groestl_outputtransformation(groestlHashState *ctx) __device__ void cn_groestl_transform(groestlHashState * __restrict__ ctx, const uint8_t * __restrict__ input, int msglen) { - for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512) + for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512) { cn_groestl_F512(ctx->chaining,(uint32_t*)input); ctx->block_counter1++; - if (ctx->block_counter1 == 0) + if (ctx->block_counter1 == 0) ctx->block_counter2++; } } -__device__ void cn_groestl_final(groestlHashState* __restrict__ ctx, +__device__ void cn_groestl_final(groestlHashState* __restrict__ ctx, BitSequence* __restrict__ output) { int i, j = 0, hashbytelen = GROESTL_HASH_BIT_LEN/8; uint8_t *s = (BitSequence*)ctx->chaining; - if (ctx->bits_in_last_byte) + if (ctx->bits_in_last_byte) { ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<ctx->bits_in_last_byte)-1)<<(8-ctx->bits_in_last_byte); ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-ctx->bits_in_last_byte); @@ -254,9 +254,9 @@ __device__ void cn_groestl_final(groestlHashState* __restrict__ ctx, ctx->buffer[(int)ctx->buf_ptr++] = 0x80; } - if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) + if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) { - while (ctx->buf_ptr < GROESTL_SIZE512) + while (ctx->buf_ptr < GROESTL_SIZE512) ctx->buffer[(int)ctx->buf_ptr++] = 0; cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512); @@ -300,14 +300,14 @@ __device__ void cn_groestl_update(groestlHashState* __restrict__ ctx, int msglen = (int)(databitlen/8); int rem = (int)(databitlen%8); - if (ctx->buf_ptr) + if (ctx->buf_ptr) { while (ctx->buf_ptr < GROESTL_SIZE512 && index < msglen) ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; - if (ctx->buf_ptr < GROESTL_SIZE512) + if (ctx->buf_ptr < GROESTL_SIZE512) { - if (rem) + if (rem) { ctx->bits_in_last_byte = rem; ctx->buffer[(int)ctx->buf_ptr++] = input[index]; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp index 679046e..284039f 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp @@ -111,7 +111,7 @@ __device__ void cn_jh_E8(jhHashState *state) { uint64_t i,roundnumber,temp0,temp1; - for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) + for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) { for (i = 0; i < 2; i++) { @@ -155,13 +155,13 @@ __device__ void cn_jh_E8(jhHashState *state) JH_SWAP32(state->x[1][i]); JH_SWAP32(state->x[3][i]); JH_SWAP32(state->x[5][i]); JH_SWAP32(state->x[7][i]); } - for (i = 0; i < 2; i++) + for (i = 0; i < 2; i++) { JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+6])[i],((uint64_t *)d_E8_rc[roundnumber+6])[i+2] ); JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); } - for (i = 1; i < 8; i = i+2) + for (i = 1; i < 8; i = i+2) { temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; @@ -190,7 +190,7 @@ __device__ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence state->databitlen += databitlen; index = 0; - if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) + if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) { if ( (databitlen & 7) == 0 ) memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)); @@ -215,7 +215,7 @@ __device__ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence cn_jh_F8(state); } - if ( databitlen > 0) + if ( databitlen > 0) { if ((databitlen & 7) == 0) memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3); @@ -247,7 +247,7 @@ __device__ void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __re state->buffer[56] = (state->databitlen >> 56) & 0xff; cn_jh_F8(state); } - else + else { /*set the rest of the bytes in the buffer to 0*/ if ( (state->datasize_in_buffer & 7) == 0) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp index 041a593..fc45db1 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp @@ -221,7 +221,7 @@ __device__ void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, co ctx->X[7] = X7 ^ w[7]; ts[1] &= ~SKEIN_T1_FLAG_FIRST; - } + } while (--blkCnt); ctx->h.T[0] = ts[0]; @@ -239,7 +239,7 @@ __device__ void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; - if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) { memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); //p8 = &ctx->b[ctx->h.bCnt]; @@ -258,7 +258,7 @@ __device__ void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __ memcpy(X,ctx->X,sizeof(X)); - for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) { ((uint64_t *)ctx->b)[0]= (uint64_t)i; Skein_Start_New_Type(ctx,OUT_FINAL); @@ -275,15 +275,15 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui { size_t n; - if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) { - if (ctx->h.bCnt) + if (ctx->h.bCnt) { n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; - if (n) + if (n) { memcpy(&ctx->b[ctx->h.bCnt],msg,n); msgByteCnt -= n; @@ -295,7 +295,7 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui ctx->h.bCnt = 0; } - if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) { n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; cn_skein512_processblock(ctx,msg,n,SKEIN_512_BLOCK_BYTES); @@ -304,7 +304,7 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui } } - if (msgByteCnt) + if (msgByteCnt) { memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); ctx->h.bCnt += msgByteCnt; @@ -313,11 +313,11 @@ __device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const ui __device__ void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen) { - if ((databitlen & 7) == 0) + if ((databitlen & 7) == 0) { cn_skein512_update(&state->u.ctx_512,data,databitlen >> 3); } - else + else { size_t bCnt = (databitlen >> 3) + 1; |