summaryrefslogtreecommitdiffstats
path: root/sys/crypto
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-10-27 14:07:57 +0000
committerpjd <pjd@FreeBSD.org>2011-10-27 14:07:57 +0000
commitd01d35973aabd48fc2e219005c0b7d92d6411b41 (patch)
tree88080e79ebb463b56cff5cfdc5feb43229825c6f /sys/crypto
parentefcbafce648f89687f6a68e8fb7a0a0dc1135f6a (diff)
downloadFreeBSD-src-d01d35973aabd48fc2e219005c0b7d92d6411b41.zip
FreeBSD-src-d01d35973aabd48fc2e219005c0b7d92d6411b41.tar.gz
Improve AES-NI performance for AES-XTS:
- Operate on uint64_t types when doing XORing, etc. instead of uint8_t. - Don't bzero() temporary block for every AES block. Do it once for entire data block. - AES-NI is available only on little endian architectures. Simplify code that takes block number from IV. Benchmarks: Memory-backed md(4) device, software AES-XTS, 4kB sector: # dd if=/dev/md0.eli bs=1m 59.61MB/s Memory-backed md(4) device, old AES-NI AES-XTS, 4kB sector: # dd if=/dev/md0.eli bs=1m 97.29MB/s Memory-backed md(4) device, new AES-NI AES-XTS, 4kB sector: # dd if=/dev/md0.eli bs=1m 221.26MB/s 127% performance improvement between old and new code. Harddisk, raw speed: # dd if=/dev/ada0 bs=1m 137.63MB/s Harddisk, software AES-XTS, 4kB sector: # dd if=/dev/ada0.eli bs=1m 47.83MB/s (34% of raw disk speed) Harddisk, old AES-NI AES-XTS, 4kB sector: # dd if=/dev/ada0.eli bs=1m 68.33MB/s (49% of raw disk speed) Harddisk, new AES-NI AES-XTS, 4kB sector: # dd if=/dev/ada0.eli bs=1m 108.35MB/s (78% of raw disk speed) 58% performance improvement between old and new code. As a side-note, GELI with AES-NI using AES-CBC can achive native disk speed. MFC after: 3 days
Diffstat (limited to 'sys/crypto')
-rw-r--r--sys/crypto/aesni/aesni_wrap.c53
1 files changed, 27 insertions, 26 deletions
diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c
index 0f3eeb1..89c896d 100644
--- a/sys/crypto/aesni/aesni_wrap.c
+++ b/sys/crypto/aesni/aesni_wrap.c
@@ -87,33 +87,33 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
static void
-aesni_crypt_xts_block(int rounds, const void *key_schedule, uint8_t *tweak,
- const uint8_t *from, uint8_t *to, int do_encrypt)
+aesni_crypt_xts_block(int rounds, const void *key_schedule, uint64_t *tweak,
+ const uint64_t *from, uint64_t *to, uint64_t *block, int do_encrypt)
{
- uint8_t block[AES_XTS_BLOCKSIZE];
- u_int i, carry_in, carry_out;
+ int carry;
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
- block[i] = from[i] ^ tweak[i];
+ block[0] = from[0] ^ tweak[0];
+ block[1] = from[1] ^ tweak[1];
if (do_encrypt)
- aesni_enc(rounds - 1, key_schedule, block, to, NULL);
+ aesni_enc(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
else
- aesni_dec(rounds - 1, key_schedule, block, to, NULL);
+ aesni_dec(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
- to[i] ^= tweak[i];
+ to[0] ^= tweak[0];
+ to[1] ^= tweak[1];
/* Exponentiate tweak. */
- carry_in = 0;
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++) {
- carry_out = tweak[i] & 0x80;
- tweak[i] = (tweak[i] << 1) | (carry_in ? 1 : 0);
- carry_in = carry_out;
+ carry = ((tweak[0] & 0x8000000000000000ULL) > 0);
+ tweak[0] <<= 1;
+ if (tweak[1] & 0x8000000000000000ULL) {
+ uint8_t *twk = (uint8_t *)tweak;
+
+ twk[0] ^= AES_XTS_ALPHA;
}
- if (carry_in)
- tweak[0] ^= AES_XTS_ALPHA;
- bzero(block, sizeof(block));
+ tweak[1] <<= 1;
+ if (carry)
+ tweak[1] |= 1;
}
static void
@@ -121,32 +121,33 @@ aesni_crypt_xts(int rounds, const void *data_schedule,
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
{
+ uint64_t block[AES_XTS_BLOCKSIZE / 8];
uint8_t tweak[AES_XTS_BLOCKSIZE];
- uint64_t blocknum;
size_t i;
/*
* Prepare tweak as E_k2(IV). IV is specified as LE representation
* of a 64-bit block number which we allow to be passed in directly.
*/
- bcopy(iv, &blocknum, AES_XTS_IVSIZE);
- for (i = 0; i < AES_XTS_IVSIZE; i++) {
- tweak[i] = blocknum & 0xff;
- blocknum >>= 8;
- }
+#if BYTE_ORDER == LITTLE_ENDIAN
+ bcopy(iv, tweak, AES_XTS_IVSIZE);
/* Last 64 bits of IV are always zero. */
bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
+#else
+#error Only LITTLE_ENDIAN architectures are supported.
+#endif
aesni_enc(rounds - 1, tweak_schedule, tweak, tweak, NULL);
len /= AES_XTS_BLOCKSIZE;
for (i = 0; i < len; i++) {
- aesni_crypt_xts_block(rounds, data_schedule, tweak, from, to,
- do_encrypt);
+ aesni_crypt_xts_block(rounds, data_schedule, (uint64_t *)tweak,
+ (const uint64_t *)from, (uint64_t *)to, block, do_encrypt);
from += AES_XTS_BLOCKSIZE;
to += AES_XTS_BLOCKSIZE;
}
bzero(tweak, sizeof(tweak));
+ bzero(block, sizeof(block));
}
static void
OpenPOWER on IntegriCloud