summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuiz Otavio O Souza <luiz@netgate.com>2015-09-15 16:15:23 -0500
committerLuiz Otavio O Souza <luiz@netgate.com>2015-10-20 12:07:13 -0500
commit47573b208d9723bf2586ef1b2448a5bd064c91b1 (patch)
tree48c329b3feba4094e541b28cab23dce41779383f
parent4a5da8d970cd2aa5659b6ef40220a69671465f74 (diff)
downloadFreeBSD-src-47573b208d9723bf2586ef1b2448a5bd064c91b1.zip
FreeBSD-src-47573b208d9723bf2586ef1b2448a5bd064c91b1.tar.gz
MFC r285254:
unroll the loop slightly... This improves performance enough to justify, especially for CBC performance where we can't pipeline.. I don't happen to have my measurements handy though... Sponsored by: Netflix, Inc. TAG: IPSEC-HEAD Issue: #4841
-rw-r--r--sys/crypto/aesni/aesencdec.h22
1 files changed, 17 insertions, 5 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h
index 76e6403..80951a4 100644
--- a/sys/crypto/aesni/aesencdec.h
+++ b/sys/crypto/aesni/aesencdec.h
@@ -1,5 +1,6 @@
/*-
* Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
+ * Copyright 2015 Netflix, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,6 +28,9 @@
*
*/
+#ifndef _AESENCDEC_H_
+#define _AESENCDEC_H_
+
#include <crypto/aesni/aesni_os.h>
#include <wmmintrin.h>
@@ -105,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a,
out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
}
+/* rounds is passed in as rounds - 1 */
static inline __m128i
aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
{
@@ -112,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
int i;
tmp = from ^ keysched[0];
-
- for (i = 0; i < rounds; i++)
+ for (i = 1; i < rounds; i += 2) {
+ tmp = _mm_aesenc_si128(tmp, keysched[i]);
tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
+ }
- return _mm_aesenclast_si128(tmp, keysched[i + 1]);
+ tmp = _mm_aesenc_si128(tmp, keysched[rounds]);
+ return _mm_aesenclast_si128(tmp, keysched[rounds + 1]);
}
static inline __m128i
@@ -127,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from)
tmp = from ^ keysched[0];
- for (i = 0; i < rounds; i++)
+ for (i = 1; i < rounds; i += 2) {
+ tmp = _mm_aesdec_si128(tmp, keysched[i]);
tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
+ }
- return _mm_aesdeclast_si128(tmp, keysched[i + 1]);
+ tmp = _mm_aesdec_si128(tmp, keysched[rounds]);
+ return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]);
}
+
+#endif /* _AESENCDEC_H_ */
OpenPOWER on IntegriCloud