summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfireice-uk <fireice-uk@users.noreply.github.com>2017-12-04 15:48:58 +0000
committerGitHub <noreply@github.com>2017-12-04 15:48:58 +0000
commitc6f56b24ec8c32de594afc2f99ee571b1485426e (patch)
tree4b48cbace306b54a0455d98d744798d834a396a5
parenta78b62d19756865879c54eac00c1c3f4d983d3a1 (diff)
parentefc8dc447adc8fcbf2c1faf0de2dacf8d38889ac (diff)
downloadxmr-stak-c6f56b24ec8c32de594afc2f99ee571b1485426e.zip
xmr-stak-c6f56b24ec8c32de594afc2f99ee571b1485426e.tar.gz
Merge pull request #332 from psychocrypt/topic-tuneAMD
optimize aes round command queue
-rw-r--r--xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl30
1 files changed, 22 insertions, 8 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
index 996944b..81e1644 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
@@ -74,15 +74,29 @@ static const __constant uint AES0_C[256] =
#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))
-uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, const uint4 key)
+uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key)
{
- uint4 Y;
- Y.s0 = AES0[BYTE(X.s0, 0)] ^ AES1[BYTE(X.s1, 1)] ^ AES2[BYTE(X.s2, 2)] ^ AES3[BYTE(X.s3, 3)];
- Y.s1 = AES0[BYTE(X.s1, 0)] ^ AES1[BYTE(X.s2, 1)] ^ AES2[BYTE(X.s3, 2)] ^ AES3[BYTE(X.s0, 3)];
- Y.s2 = AES0[BYTE(X.s2, 0)] ^ AES1[BYTE(X.s3, 1)] ^ AES2[BYTE(X.s0, 2)] ^ AES3[BYTE(X.s1, 3)];
- Y.s3 = AES0[BYTE(X.s3, 0)] ^ AES1[BYTE(X.s0, 1)] ^ AES2[BYTE(X.s1, 2)] ^ AES3[BYTE(X.s2, 3)];
- Y ^= key;
- return(Y);
+ key.s0 ^= AES0[BYTE(X.s0, 0)];
+ key.s1 ^= AES0[BYTE(X.s1, 0)];
+ key.s2 ^= AES0[BYTE(X.s2, 0)];
+ key.s3 ^= AES0[BYTE(X.s3, 0)];
+
+ key.s0 ^= AES2[BYTE(X.s2, 2)];
+ key.s1 ^= AES2[BYTE(X.s3, 2)];
+ key.s2 ^= AES2[BYTE(X.s0, 2)];
+ key.s3 ^= AES2[BYTE(X.s1, 2)];
+
+ key.s0 ^= AES1[BYTE(X.s1, 1)];
+ key.s1 ^= AES1[BYTE(X.s2, 1)];
+ key.s2 ^= AES1[BYTE(X.s3, 1)];
+ key.s3 ^= AES1[BYTE(X.s0, 1)];
+
+ key.s0 ^= AES3[BYTE(X.s3, 3)];
+ key.s1 ^= AES3[BYTE(X.s0, 3)];
+ key.s2 ^= AES3[BYTE(X.s1, 3)];
+ key.s3 ^= AES3[BYTE(X.s2, 3)];
+
+ return key;
}
#endif
OpenPOWER on IntegriCloud