summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-03 16:40:42 +0100
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-12-03 16:40:42 +0100
commitefc8dc447adc8fcbf2c1faf0de2dacf8d38889ac (patch)
tree37d456be932d10c2ec27a1dc9b1fd089aec51e78
parent2920e9a3227da307b04ee23ecc5c63ecee4a224c (diff)
downloadxmr-stak-efc8dc447adc8fcbf2c1faf0de2dacf8d38889ac.zip
xmr-stak-efc8dc447adc8fcbf2c1faf0de2dacf8d38889ac.tar.gz
optimize aes round command queue
unroll AES round by hand to help the compiler to optimize the command queue
-rw-r--r--xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl30
1 files changed, 22 insertions, 8 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
index 996944b..81e1644 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
@@ -74,15 +74,29 @@ static const __constant uint AES0_C[256] =
#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))
-uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, const uint4 key)
+uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key)
{
- uint4 Y;
- Y.s0 = AES0[BYTE(X.s0, 0)] ^ AES1[BYTE(X.s1, 1)] ^ AES2[BYTE(X.s2, 2)] ^ AES3[BYTE(X.s3, 3)];
- Y.s1 = AES0[BYTE(X.s1, 0)] ^ AES1[BYTE(X.s2, 1)] ^ AES2[BYTE(X.s3, 2)] ^ AES3[BYTE(X.s0, 3)];
- Y.s2 = AES0[BYTE(X.s2, 0)] ^ AES1[BYTE(X.s3, 1)] ^ AES2[BYTE(X.s0, 2)] ^ AES3[BYTE(X.s1, 3)];
- Y.s3 = AES0[BYTE(X.s3, 0)] ^ AES1[BYTE(X.s0, 1)] ^ AES2[BYTE(X.s1, 2)] ^ AES3[BYTE(X.s2, 3)];
- Y ^= key;
- return(Y);
+ key.s0 ^= AES0[BYTE(X.s0, 0)];
+ key.s1 ^= AES0[BYTE(X.s1, 0)];
+ key.s2 ^= AES0[BYTE(X.s2, 0)];
+ key.s3 ^= AES0[BYTE(X.s3, 0)];
+
+ key.s0 ^= AES2[BYTE(X.s2, 2)];
+ key.s1 ^= AES2[BYTE(X.s3, 2)];
+ key.s2 ^= AES2[BYTE(X.s0, 2)];
+ key.s3 ^= AES2[BYTE(X.s1, 2)];
+
+ key.s0 ^= AES1[BYTE(X.s1, 1)];
+ key.s1 ^= AES1[BYTE(X.s2, 1)];
+ key.s2 ^= AES1[BYTE(X.s3, 1)];
+ key.s3 ^= AES1[BYTE(X.s0, 1)];
+
+ key.s0 ^= AES3[BYTE(X.s3, 3)];
+ key.s1 ^= AES3[BYTE(X.s0, 3)];
+ key.s2 ^= AES3[BYTE(X.s1, 3)];
+ key.s3 ^= AES3[BYTE(X.s2, 3)];
+
+ return key;
}
#endif
OpenPOWER on IntegriCloud