diff options
author | jmg <jmg@FreeBSD.org> | 2015-07-07 20:31:09 +0000 |
---|---|---|
committer | jmg <jmg@FreeBSD.org> | 2015-07-07 20:31:09 +0000 |
commit | 2b00c9ed0d0ba155d3185779824447025a77515c (patch) | |
tree | 53ef132a3bfb9397f08413dee8402c67fbe382dc /sys/crypto | |
parent | 61259e66f89da111f026b207e9aafc3708a078c0 (diff) | |
download | FreeBSD-src-2b00c9ed0d0ba155d3185779824447025a77515c.zip FreeBSD-src-2b00c9ed0d0ba155d3185779824447025a77515c.tar.gz |
unroll the loop slightly... This improves performance enough to
justify, especially for CBC performance where we can't pipeline.. I
don't happen to have my measurements handy though...
Sponsored by: Netflix, Inc.
Diffstat (limited to 'sys/crypto')
-rw-r--r-- | sys/crypto/aesni/aesencdec.h | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h index 76e6403..80951a4 100644 --- a/sys/crypto/aesni/aesencdec.h +++ b/sys/crypto/aesni/aesencdec.h @@ -1,5 +1,6 @@ /*- * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,9 @@ * */ +#ifndef _AESENCDEC_H_ +#define _AESENCDEC_H_ + #include <crypto/aesni/aesni_os.h> #include <wmmintrin.h> @@ -105,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a, out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); } +/* rounds is passed in as rounds - 1 */ static inline __m128i aesni_enc(int rounds, const __m128i *keysched, const __m128i from) { @@ -112,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from) int i; tmp = from ^ keysched[0]; - - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesenc_si128(tmp, keysched[i]); tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); + } - return _mm_aesenclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesenc_si128(tmp, keysched[rounds]); + return _mm_aesenclast_si128(tmp, keysched[rounds + 1]); } static inline __m128i @@ -127,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from) tmp = from ^ keysched[0]; - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesdec_si128(tmp, keysched[i]); tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); + } - return _mm_aesdeclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesdec_si128(tmp, keysched[rounds]); + return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]); } + +#endif /* _AESENCDEC_H_ */ |