diff options
author | Luiz Otavio O Souza <luiz@netgate.com> | 2015-09-15 12:29:06 -0500 |
---|---|---|
committer | Luiz Otavio O Souza <luiz@netgate.com> | 2015-10-20 11:31:33 -0500 |
commit | 8debb5ec1f7f108a317cc2c7624198ba2eb03469 (patch) | |
tree | 989208028a8311efb7567490c699e434d211f110 /sys/crypto | |
parent | 7353719223942a91cc891d735e5c7a494f7a2244 (diff) | |
download | FreeBSD-src-8debb5ec1f7f108a317cc2c7624198ba2eb03469.zip FreeBSD-src-8debb5ec1f7f108a317cc2c7624198ba2eb03469.tar.gz |
Revert AESNI patches.
Revert "Importing pfSense patch aesgcm.hwaccl.diff"
This reverts commit 86163f54d3288d43997b0766d4c2538ed7f70b17.
TAG: IPSEC-HEAD
Issue: #4841
Diffstat (limited to 'sys/crypto')
-rw-r--r-- | sys/crypto/aesni/aesni.c | 401 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.h | 15 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_ghash.c | 523 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_wrap.c | 37 |
4 files changed, 122 insertions, 854 deletions
diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index e1bd5e8..7d7a740 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -39,15 +39,14 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/bus.h> #include <sys/uio.h> -#include <sys/mbuf.h> #include <crypto/aesni/aesni.h> #include <cryptodev_if.h> -#include <opencrypto/gmac.h> struct aesni_softc { int32_t cid; - volatile uint32_t nsessions; - struct aesni_session *sessions; + uint32_t sid; + TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; + struct rwlock lock; }; static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); @@ -57,7 +56,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc, static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); static int aesni_cipher_process(struct aesni_session *ses, - struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); + struct cryptodesc *enccrd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); @@ -80,12 +79,12 @@ aesni_probe(device_t dev) return (EINVAL); } - if ((cpu_feature & CPUID2_SSE41) == 0 && (cpu_feature2 & CPUID2_SSE41) == 0) { - device_printf(dev, "No SSE4.1 support.\n"); + if ((cpu_feature & CPUID_SSE2) == 0) { + device_printf(dev, "No SSE2 support but AESNI!?!\n"); return (EINVAL); } - device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM"); + device_set_desc_copy(dev, "AES-CBC,AES-XTS"); return (0); } @@ -95,6 +94,8 @@ aesni_attach(device_t dev) struct aesni_softc *sc; sc = device_get_softc(dev); + TAILQ_INIT(&sc->sessions); + sc->sid = 1; sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { @@ -102,16 +103,9 @@ aesni_attach(device_t dev) return (ENOMEM); } - sc->nsessions = 32; - sc->sessions = malloc(sc->nsessions * sizeof(struct aesni_session), - M_AESNI, M_WAITOK | M_ZERO); - + rw_init(&sc->lock, "aesni_lock"); crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); - crypto_register(sc->cid, CRYPTO_AES_RFC4106_GCM_16, 0, 0); - crypto_register(sc->cid, CRYPTO_AES_128_GMAC, 0, 0); - crypto_register(sc->cid, CRYPTO_AES_192_GMAC, 0, 0); - crypto_register(sc->cid, CRYPTO_AES_256_GMAC, 0, 0); return (0); } @@ -120,24 +114,25 @@ aesni_detach(device_t dev) { struct aesni_softc *sc; struct aesni_session *ses; - int i; sc = device_get_softc(dev); - for (i = 0; i < sc->nsessions; i++) { - ses = &sc->sessions[i]; + rw_wlock(&sc->lock); + TAILQ_FOREACH(ses, &sc->sessions, next) { if (ses->used) { + rw_wunlock(&sc->lock); device_printf(dev, "Cannot detach, sessions still active.\n"); return (EBUSY); } } - crypto_unregister_all(sc->cid); - for (i = 0; i < sc->nsessions; i++) { - ses = &sc->sessions[i]; - if (ses->fpu_ctx != NULL) - fpu_kern_free_ctx(ses->fpu_ctx); + while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { + TAILQ_REMOVE(&sc->sessions, ses, next); + fpu_kern_free_ctx(ses->fpu_ctx); + free(ses, M_AESNI); } - free(sc->sessions, M_AESNI); + rw_wunlock(&sc->lock); + rw_destroy(&sc->lock); + crypto_unregister_all(sc->cid); return (0); } @@ -147,12 +142,10 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct aesni_softc *sc; struct aesni_session *ses; struct cryptoini *encini; - int error, sessn; + int error; - if (sidp == NULL || cri == NULL) { - printf("no sidp or cri"); + if (sidp == NULL || cri == NULL) return (EINVAL); - } sc = device_get_softc(dev); ses = NULL; @@ -160,76 +153,55 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: - if (encini != NULL) { - printf("encini already set"); - return (EINVAL); - } - encini = cri; - break; case CRYPTO_AES_XTS: - case CRYPTO_AES_RFC4106_GCM_16: - if (encini != NULL) { - printf("encini already set"); + if (encini != NULL) return (EINVAL); - } - encini = cri; - break; - case CRYPTO_AES_128_GMAC: - case CRYPTO_AES_192_GMAC: - case CRYPTO_AES_256_GMAC: - /* - * nothing to do here, maybe in the future cache some - * values for GHASH - */ + encini = cri; break; default: - printf("unhandled algorithm"); return (EINVAL); } } - if (encini == NULL) { - printf("no cipher"); + if (encini == NULL) return (EINVAL); - } - for (sessn = 1; sessn < sc->nsessions; sessn++) { - if (!sc->sessions[sessn].used) { - ses = &sc->sessions[sessn]; - break; - } - } - if (ses == NULL) { - ses = malloc(sizeof(*ses) * sc->nsessions * 2, M_AESNI, M_NOWAIT | M_ZERO); + rw_wlock(&sc->lock); + /* + * Free sessions goes first, so if first session is used, we need to + * allocate one. + */ + ses = TAILQ_FIRST(&sc->sessions); + if (ses == NULL || ses->used) { + ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO); if (ses == NULL) { - sc->sessions = ses; + rw_wunlock(&sc->lock); return (ENOMEM); } - bcopy((void *)sc->sessions, (void *)ses, sc->nsessions * sizeof(*ses)); - atomic_set_ptr((u_long *)sc->sessions, (u_long)ses); - bzero((void *)ses, sc->nsessions * sizeof(*ses)); - ses = &sc->sessions[sc->nsessions]; - ses->id = sc->nsessions; - atomic_add_int(&sc->nsessions, 1); - } else if (ses->id == 0) - ses->id = sessn; - - if (ses->fpu_ctx == NULL) { ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | FPU_KERN_NOWAIT); - if (ses->fpu_ctx == NULL) + if (ses->fpu_ctx == NULL) { + free(ses, M_AESNI); + rw_wunlock(&sc->lock); return (ENOMEM); + } + ses->id = sc->sid++; + } else { + TAILQ_REMOVE(&sc->sessions, ses, next); } + ses->used = 1; + TAILQ_INSERT_TAIL(&sc->sessions, ses, next); + rw_wunlock(&sc->lock); ses->algo = encini->cri_alg; error = aesni_cipher_setup(ses, encini); if (error != 0) { - printf("setup failed"); + rw_wlock(&sc->lock); aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); return (error); } - ses->used = 1; - *sidp = ses->id; + *sidp = ses->id; return (0); } @@ -240,10 +212,12 @@ aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) uint32_t sid; sid = ses->id; + TAILQ_REMOVE(&sc->sessions, ses, next); ctx = ses->fpu_ctx; bzero(ses, sizeof(*ses)); ses->id = sid; ses->fpu_ctx = ctx; + TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } static int @@ -255,14 +229,17 @@ aesni_freesession(device_t dev, uint64_t tid) sc = device_get_softc(dev); sid = ((uint32_t)tid) & 0xffffffff; - if (sid >= sc->nsessions) - return (EINVAL); - - ses = &sc->sessions[sid]; - if (ses == NULL) + rw_wlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == sid) + break; + } + if (ses == NULL) { + rw_wunlock(&sc->lock); return (EINVAL); - + } aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); return (0); } @@ -271,25 +248,20 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; - struct cryptodesc *crd, *enccrd, *authcrd; - uint32_t sid; - int error, needauth; + struct cryptodesc *crd, *enccrd; + int error; error = 0; enccrd = NULL; - authcrd = NULL; - needauth = 0; /* Sanity check. */ if (crp == NULL) return (EINVAL); - if (crp->crp_callback == NULL || crp->crp_desc == NULL) - return (EINVAL); - - sid = ((uint32_t)crp->crp_sid) & 0xffffffff; - if (sid >= sc->nsessions) - return (EINVAL); + if (crp->crp_callback == NULL || crp->crp_desc == NULL) { + error = EINVAL; + goto out; + } for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { @@ -301,51 +273,27 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) } enccrd = crd; break; - - case CRYPTO_AES_RFC4106_GCM_16: - if (enccrd != NULL) { - error = EINVAL; - goto out; - } - enccrd = crd; - needauth = 1; - break; - - case CRYPTO_AES_128_GMAC: - case CRYPTO_AES_192_GMAC: - case CRYPTO_AES_256_GMAC: - if (authcrd != NULL) { - error = EINVAL; - goto out; - } - authcrd = crd; - needauth = 1; - break; - default: return (EINVAL); } } - - if (enccrd == NULL || (needauth && authcrd == NULL)) { + if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } - /* CBC & XTS can only handle full blocks for now */ - if ((enccrd->crd_len == CRYPTO_AES_CBC || enccrd->crd_len == - CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { - error = EINVAL; - goto out; + rw_rlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == (crp->crp_sid & 0xffffffff)) + break; } - - ses = &sc->sessions[sid]; + rw_runlock(&sc->lock); if (ses == NULL) { error = EINVAL; goto out; } - error = aesni_cipher_process(ses, enccrd, authcrd, crp); + error = aesni_cipher_process(ses, enccrd, crp); if (error != 0) goto out; @@ -359,17 +307,13 @@ uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { - struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; - if (crp->crp_flags & CRYPTO_F_IMBUF) { - m = (struct mbuf *)crp->crp_buf; - if (m->m_next != NULL) - goto alloc; - addr = mtod(m, uint8_t *); - } else if (crp->crp_flags & CRYPTO_F_IOV) { + if (crp->crp_flags & CRYPTO_F_IMBUF) + goto alloc; + else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; @@ -378,7 +322,6 @@ aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, } else addr = (u_char *)crp->crp_buf; *allocated = 0; - addr += enccrd->crd_skip; return (addr); alloc: @@ -423,201 +366,83 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) int error; td = curthread; - critical_enter(); error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) { - critical_exit(); + if (error != 0) return (error); - } error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); fpu_kern_leave(td, ses->fpu_ctx); - critical_exit(); return (error); } -#ifdef AESNI_DEBUG -static void -aesni_printhexstr(uint8_t *ptr, int len) -{ - int i; - - for (i = 0; i < len; i++) - printf("%02hhx", ptr[i]); -} -#endif - static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, - struct cryptodesc *authcrd, struct cryptop *crp) + struct cryptop *crp) { - uint8_t *tag; - uint8_t *iv; struct thread *td; - uint8_t *buf, *authbuf; - int error, allocated, authallocated; - int ivlen, encflag, i; - - encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; + uint8_t *buf; + int error, allocated; buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); - authbuf = NULL; - authallocated = 0; - if (authcrd != NULL) { - authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); - if (authbuf == NULL) { - error = ENOMEM; - goto out1; - } - /* NOTE: GMAC_DIGEST_LEN == AES_BLOCK_LEN */ - tag = authcrd->crd_iv; - } - - iv = enccrd->crd_iv; - /* XXX - validate that enccrd and authcrd have/use same key? */ - switch (enccrd->crd_alg) { - case CRYPTO_AES_CBC: - ivlen = 16; - break; - case CRYPTO_AES_XTS: - ivlen = 8; - break; - case CRYPTO_AES_RFC4106_GCM_16: - /* Be smart at determining the ivlen until better ways are present */ - ivlen = enccrd->crd_skip - enccrd->crd_inject; - ivlen += 4; - break; - } - - /* Setup ses->iv */ - if (encflag) { - if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, iv, ivlen); - else if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) { - if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) { - for (i = 0; i < AESCTR_NONCESIZE; i++) - iv[i] = ses->nonce[i]; - /* XXX: Is this enough? */ - u_long counter = atomic_fetchadd_long(&ses->aesgcmcounter, 1); - bcopy((void *)&counter, iv + AESCTR_NONCESIZE, sizeof(uint64_t)); - crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE); - } else { - arc4rand(iv, AES_BLOCK_LEN, 0); - crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, ivlen, iv); - } - } - } else { - if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, iv, ivlen); - else { - if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) { - for (i = 0; i < AESCTR_NONCESIZE; i++) - iv[i] = ses->nonce[i]; - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE); - } else - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, ivlen, iv); - } - } -#ifdef AESNI_DEBUG - aesni_printhexstr(iv, ivlen); - printf("\n"); -#endif - - if (authcrd != NULL && !encflag) { - crypto_copydata(crp->crp_flags, crp->crp_buf, - authcrd->crd_inject, GMAC_DIGEST_LEN, tag); - } else { -#ifdef AESNI_DEBUG - printf("ptag: "); - aesni_printhexstr(tag, sizeof tag); - printf("\n"); -#endif - bzero(tag, sizeof tag); - } - td = curthread; - - critical_enter(); error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) { - critical_exit(); + if (error != 0) goto out1; + + if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { + error = aesni_cipher_setup_common(ses, enccrd->crd_key, + enccrd->crd_klen); + if (error != 0) + goto out; } - /* Do work */ - switch (ses->algo) { - case CRYPTO_AES_CBC: - if (encflag) + + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) + crypto_copyback(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + if (ses->algo == CRYPTO_AES_CBC) { aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, - enccrd->crd_len, buf, buf, iv); - else - aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, - enccrd->crd_len, buf, iv); - break; - case CRYPTO_AES_XTS: - if (encflag) + enccrd->crd_len, buf, buf, ses->iv); + } else /* if (ses->algo == CRYPTO_AES_XTS) */ { aesni_encrypt_xts(ses->rounds, ses->enc_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, - iv); + ses->iv); + } + } else { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); else + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + if (ses->algo == CRYPTO_AES_CBC) { + aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, + enccrd->crd_len, buf, ses->iv); + } else /* if (ses->algo == CRYPTO_AES_XTS) */ { aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, - iv); - break; - case CRYPTO_AES_RFC4106_GCM_16: -#ifdef AESNI_DEBUG - printf("GCM: %d\n", encflag); - printf("buf(%d): ", enccrd->crd_len); - aesni_printhexstr(buf, enccrd->crd_len); - printf("\nauthbuf(%d): ", authcrd->crd_len); - aesni_printhexstr(authbuf, authcrd->crd_len); - printf("\niv: "); - aesni_printhexstr(iv, ivlen); - printf("\ntag: "); - aesni_printhexstr(tag, 16); - printf("\nsched: "); - aesni_printhexstr(ses->enc_schedule, 16 * (ses->rounds + 1)); - printf("\n"); -#endif - if (encflag) - AES_GCM_encrypt(buf, buf, authbuf, iv, tag, - enccrd->crd_len, authcrd->crd_len, ivlen, - ses->enc_schedule, ses->rounds); - else { - if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag, - enccrd->crd_len, authcrd->crd_len, ivlen, - ses->enc_schedule, ses->rounds)) - error = EBADMSG; + ses->iv); } - break; } - fpu_kern_leave(td, ses->fpu_ctx); - critical_exit(); - if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); - - if (!error && authcrd != NULL) { - crypto_copyback(crp->crp_flags, crp->crp_buf, - authcrd->crd_inject, crp->crp_ilen - authcrd->crd_inject, tag); - } - + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, + AES_BLOCK_LEN, ses->iv); +out: + fpu_kern_leave(td, ses->fpu_ctx); out1: if (allocated) { bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } - if (authallocated) - free(authbuf, M_AESNI); - return (error); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index fbbbefb..ff1d1a2 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -56,9 +56,7 @@ struct aesni_session { uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16); - /* AES-GCM needs a counter hence the separated enc/dec IV */ - uint8_t nonce[4]; - volatile uint64_t aesgcmcounter; + uint8_t iv[AES_BLOCK_LEN]; int algo; int rounds; /* uint8_t *ses_ictx; */ @@ -66,6 +64,7 @@ struct aesni_session { /* int ses_mlen; */ int used; uint32_t id; + TAILQ_ENTRY(aesni_session) next; struct fpu_kern_ctx *fpu_ctx; }; @@ -97,16 +96,6 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); -/* GCM & GHASH functions */ -void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, - const unsigned char *addt, const unsigned char *ivec, - unsigned char *tag, int nbytes, int abytes, int ibytes, - const unsigned char *key, int nr); -int AES_GCM_decrypt(const unsigned char *in, unsigned char *out, - const unsigned char *addt, const unsigned char *ivec, - unsigned char *tag, int nbytes, int abytes, int ibytes, - const unsigned char *key, int nr); - int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen); uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c deleted file mode 100644 index 16e6f53..0000000 --- a/sys/crypto/aesni/aesni_ghash.c +++ /dev/null @@ -1,523 +0,0 @@ -/*- - * Copyright (c) 2014 The FreeBSD Foundation - * All rights reserved. - * - * This software was developed by John-Mark Gurney under - * the sponsorship from the FreeBSD Foundation and - * Rubicon Communications, LLC (Netgate) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * - * $Id$ - * - */ - -/* - * Figure 5, 7, and 11 are copied from the Intel white paper: Intel - * s Multiplication Instruction and its Usage for Computing the GCM Mode - * - * and as such are: Copyright © 2010 Intel Corporation. All rights reserved. - * - * Please see white paper for complete license. - */ - -#ifdef _KERNEL -#include <crypto/aesni/aesni.h> -#else -#include <stdint.h> -#endif - -#include <wmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> - -/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */ - -#define REFLECT(X) \ - hlp1 = _mm_srli_epi16(X,4);\ - X = _mm_and_si128(AMASK,X);\ - hlp1 = _mm_and_si128(AMASK,hlp1);\ - X = _mm_shuffle_epi8(MASKH,X);\ - hlp1 = _mm_shuffle_epi8(MASKL,hlp1);\ - X = _mm_xor_si128(X,hlp1) - -static inline int -m128icmp(__m128i a, __m128i b) -{ - __m128i cmp; - - cmp = _mm_cmpeq_epi32(a, b); - - return _mm_movemask_epi8(cmp) == 0xffff; -} - -/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */ -static void -gfmul_decrypt(__m128i a, __m128i b, __m128i * res) -{ - __m128i /* tmp0, tmp1, tmp2, */ tmp3, tmp4, tmp5, tmp6, - tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; - __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0); - - tmp3 = _mm_clmulepi64_si128(a, b, 0x00); - tmp6 = _mm_clmulepi64_si128(a, b, 0x11); - tmp4 = _mm_shuffle_epi32(a, 78); - tmp5 = _mm_shuffle_epi32(b, 78); - tmp4 = _mm_xor_si128(tmp4, a); - tmp5 = _mm_xor_si128(tmp5, b); - tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); - tmp4 = _mm_xor_si128(tmp4, tmp3); - tmp4 = _mm_xor_si128(tmp4, tmp6); - tmp5 = _mm_slli_si128(tmp4, 8); - tmp4 = _mm_srli_si128(tmp4, 8); - tmp3 = _mm_xor_si128(tmp3, tmp5); - tmp6 = _mm_xor_si128(tmp6, tmp4); - tmp7 = _mm_srli_epi32(tmp6, 31); - tmp8 = _mm_srli_epi32(tmp6, 30); - tmp9 = _mm_srli_epi32(tmp6, 25); - tmp7 = _mm_xor_si128(tmp7, tmp8); - tmp7 = _mm_xor_si128(tmp7, tmp9); - tmp8 = _mm_shuffle_epi32(tmp7, 147); - - tmp7 = _mm_and_si128(XMMMASK, tmp8); - tmp8 = _mm_andnot_si128(XMMMASK, tmp8); - tmp3 = _mm_xor_si128(tmp3, tmp8); - tmp6 = _mm_xor_si128(tmp6, tmp7); - tmp10 = _mm_slli_epi32(tmp6, 1); - tmp3 = _mm_xor_si128(tmp3, tmp10); - tmp11 = _mm_slli_epi32(tmp6, 2); - tmp3 = _mm_xor_si128(tmp3, tmp11); - tmp12 = _mm_slli_epi32(tmp6, 7); - tmp3 = _mm_xor_si128(tmp3, tmp12); - - *res = _mm_xor_si128(tmp3, tmp6); -} - -void -AES_GCM_encrypt(const unsigned char *in, - unsigned char *out, - const unsigned char *addt, - const unsigned char *ivec, - unsigned char *tag, - int nbytes, - int abytes, - int ibytes, - const unsigned char *key, - int nr) -{ - int i , j, k; - __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ; - __m128i tmp1 , tmp2, tmp3, tmp4; - __m128i H , T; - __m128i *KEY = (__m128i *) key; - __m128i ctr1 , ctr2, ctr3, ctr4; - __m128i last_block = _mm_setzero_si128(); - __m128i ONE = _mm_set_epi32(0, 1, 0, 0); - __m128i FOUR = _mm_set_epi32(0, 4, 0, 0); - __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, - 6, 7); - /* - * __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - * 10, 11, 12, 13, 14, 15); - */ - __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128(); - __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f); - __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800); - __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000); - __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); - - if (ibytes == 96 / 8) { - Y = _mm_loadu_si128((__m128i *) ivec); - Y = _mm_insert_epi32(Y, 0x1000000, 3); - /* (Compute E[ZERO, KS] and E[Y0, KS] together */ - tmp1 = _mm_xor_si128(X, KEY[0]); - tmp2 = _mm_xor_si128(Y, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); - H = _mm_aesenclast_si128(tmp1, KEY[nr]); - T = _mm_aesenclast_si128(tmp2, KEY[nr]); - REFLECT(H); - } else { - tmp1 = _mm_xor_si128(X, KEY[0]); - for (j = 1; j < nr; j++) - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - H = _mm_aesenclast_si128(tmp1, KEY[nr]); - REFLECT(H); - Y = _mm_xor_si128(Y, Y); - for (i = 0; i < ibytes / 16; i++) { - tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]); - REFLECT(tmp1); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - } - if (ibytes % 16) { - for (j = 0; j < ibytes % 16; j++) - ((unsigned char *)&last_block)[j] = ivec[i * 16 + j]; - tmp1 = last_block; - REFLECT(tmp1); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - } - tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0); - tmp1 = _mm_insert_epi64(tmp1, 0, 1); - REFLECT(tmp1); - tmp1 = _mm_shuffle_epi8(tmp1, MASKF); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - REFLECT(Y); - /* Compute E(K, Y0) */ - tmp1 = _mm_xor_si128(Y, KEY[0]); - for (j = 1; j < nr; j++) - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - T = _mm_aesenclast_si128(tmp1, KEY[nr]); - } - - for (i = 0; i < abytes / 16; i++) { - tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]); - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - if (abytes % 16) { - last_block = _mm_setzero_si128(); - for (j = 0; j < abytes % 16; j++) - ((unsigned char *)&last_block)[j] = addt[i * 16 + j]; - tmp1 = last_block; - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, ONE); - ctr2 = _mm_add_epi64(ctr1, ONE); - ctr3 = _mm_add_epi64(ctr2, ONE); - ctr4 = _mm_add_epi64(ctr3, ONE); - for (i = 0; i < nbytes / 16 / 4; i++) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); - tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); - tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, FOUR); - ctr2 = _mm_add_epi64(ctr2, FOUR); - ctr3 = _mm_add_epi64(ctr3, FOUR); - ctr4 = _mm_add_epi64(ctr4, FOUR); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - tmp2 = _mm_xor_si128(tmp2, KEY[0]); - tmp3 = _mm_xor_si128(tmp3, KEY[0]); - tmp4 = _mm_xor_si128(tmp4, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]); - tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]); - tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0])); - tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1])); - tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2])); - tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3])); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4); - REFLECT(tmp1); - REFLECT(tmp2); - REFLECT(tmp3); - REFLECT(tmp4); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - X = _mm_xor_si128(X, tmp2); - gfmul_decrypt(X, H, &X); - X = _mm_xor_si128(X, tmp3); - gfmul_decrypt(X, H, &X); - X = _mm_xor_si128(X, tmp4); - gfmul_decrypt(X, H, &X); - } - for (k = i * 4; k < nbytes / 16; k++) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, ONE); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); - _mm_storeu_si128(&((__m128i *) out)[k], tmp1); - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - //If one partial block remains - if (nbytes % 16) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); - last_block = tmp1; - for (j = 0; j < nbytes % 16; j++) - out[k * 16 + j] = ((unsigned char *)&last_block)[j]; - for (; j < 16; j++) - ((unsigned char *)&last_block)[j] = 0; - tmp1 = last_block; - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0); - tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1); - REFLECT(tmp1); - tmp1 = _mm_shuffle_epi8(tmp1, MASKF); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - REFLECT(X); - T = _mm_xor_si128(X, T); - _mm_storeu_si128((__m128i *) tag, T); -} - -int -AES_GCM_decrypt(const unsigned char *in, - unsigned char *out, - const unsigned char *addt, - const unsigned char *ivec, - unsigned char *tag, - int nbytes, - int abytes, - int ibytes, - const unsigned char *key, - int nr) -{ - int i , j, k; - __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ; - __m128i tmp1 , tmp2, tmp3, tmp4; - __m128i H , T; - __m128i *KEY = (__m128i *) key; - __m128i ctr1 , ctr2, ctr3, ctr4; - __m128i last_block = _mm_setzero_si128(); - __m128i ONE = _mm_set_epi32(0, 1, 0, 0); - __m128i FOUR = _mm_set_epi32(0, 4, 0, 0); - __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, - 6, 7); - __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15); - __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128(); - __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f); - __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800); - __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000); - __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); - - if (ibytes == 96 / 8) { - Y = _mm_loadu_si128((__m128i *) ivec); - Y = _mm_insert_epi32(Y, 0x1000000, 3); - /* (Compute E[ZERO, KS] and E[Y0, KS] together */ - tmp1 = _mm_xor_si128(X, KEY[0]); - tmp2 = _mm_xor_si128(Y, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); - }; - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); - H = _mm_aesenclast_si128(tmp1, KEY[nr]); - T = _mm_aesenclast_si128(tmp2, KEY[nr]); - REFLECT(H); - } else { - tmp1 = _mm_xor_si128(X, KEY[0]); - for (j = 1; j < nr; j++) - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - H = _mm_aesenclast_si128(tmp1, KEY[nr]); - REFLECT(H); - Y = _mm_xor_si128(Y, Y); - for (i = 0; i < ibytes / 16; i++) { - tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]); - REFLECT(tmp1); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - } - if (ibytes % 16) { - for (j = 0; j < ibytes % 16; j++) - ((unsigned char *)&last_block)[j] = ivec[i * 16 + j]; - tmp1 = last_block; - REFLECT(tmp1); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - } - tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0); - tmp1 = _mm_insert_epi64(tmp1, 0, 1); - REFLECT(tmp1); - tmp1 = _mm_shuffle_epi8(tmp1, MASKF); - Y = _mm_xor_si128(Y, tmp1); - gfmul_decrypt(Y, H, &Y); - REFLECT(Y); - /* Compute E(K, Y0) */ - tmp1 = _mm_xor_si128(Y, KEY[0]); - for (j = 1; j < nr; j++) - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - T = _mm_aesenclast_si128(tmp1, KEY[nr]); - } - for (i = 0; i < abytes / 16; i++) { - tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]); - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - if (abytes % 16) { - last_block = _mm_setzero_si128(); - for (j = 0; j < abytes % 16; j++) - ((unsigned char *)&last_block)[j] = addt[i * 16 + j]; - tmp1 = last_block; - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - for (i = 0; i < nbytes / 16; i++) { - tmp1 = _mm_loadu_si128(&((__m128i *) in)[i]); - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - if (nbytes % 16) { - last_block = _mm_setzero_si128(); - for (j = 0; j < nbytes % 16; j++) - ((unsigned char *)&last_block)[j] = in[i * 16 + j]; - tmp1 = last_block; - REFLECT(tmp1); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - } - tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0); - tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1); - REFLECT(tmp1); - tmp1 = _mm_shuffle_epi8(tmp1, MASKF); - X = _mm_xor_si128(X, tmp1); - gfmul_decrypt(X, H, &X); - REFLECT(X); - T = _mm_xor_si128(X, T); - if (!m128icmp(T, _mm_loadu_si128((__m128i*)tag))) - return 0; //in case the authentication failed - - //in case the authentication failed - ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, ONE); - ctr2 = _mm_add_epi64(ctr1, ONE); - ctr3 = _mm_add_epi64(ctr2, ONE); - ctr4 = _mm_add_epi64(ctr3, ONE); - for (i = 0; i < nbytes / 16 / 4; i++) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); - tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); - tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, FOUR); - ctr2 = _mm_add_epi64(ctr2, FOUR); - ctr3 = _mm_add_epi64(ctr3, FOUR); - ctr4 = _mm_add_epi64(ctr4, FOUR); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - tmp2 = _mm_xor_si128(tmp2, KEY[0]); - tmp3 = _mm_xor_si128(tmp3, KEY[0]); - tmp4 = _mm_xor_si128(tmp4, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); - tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]); - tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]); - tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]); - tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0])); - tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1])); - tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2])); - tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3])); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3); - _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4); - tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); - tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); - tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); - tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); - } - for (k = i * 4; k < nbytes / 16; k++) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - ctr1 = _mm_add_epi64(ctr1, ONE); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); - _mm_storeu_si128(&((__m128i *) out)[k], tmp1); - } - //If one partial block remains - if (nbytes % 16) { - tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); - tmp1 = _mm_xor_si128(tmp1, KEY[0]); - for (j = 1; j < nr - 1; j += 2) { - tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); - tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); - } - tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); - tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); - tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); - last_block = tmp1; - for (j = 0; j < nbytes % 16; j++) - out[k * 16 + j] = ((unsigned char *)&last_block)[j]; - } - return 1; - //when sucessfull returns 1 -} diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 83d79fc..39819a6 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -176,6 +176,10 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, } } +#define AES_XTS_BLOCKSIZE 16 +#define AES_XTS_IVSIZE 8 +#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ + static inline __m128i xts_crank_lfsr(__m128i inp) { @@ -343,23 +347,6 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: - printf("invalid CBC/GCM key length"); - return (EINVAL); - } - break; - case CRYPTO_AES_RFC4106_GCM_16: - switch (keylen) { - case 160: - ses->rounds = AES128_ROUNDS; - break; - case 224: - ses->rounds = AES192_ROUNDS; - break; - case 288: - ses->rounds = AES256_ROUNDS; - break; - default: - printf("invalid CBC/GCM key length"); return (EINVAL); } break; @@ -372,7 +359,6 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: - printf("invalid XTS key length"); return (EINVAL); } break; @@ -382,20 +368,11 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, aesni_set_enckey(key, ses->enc_schedule, ses->rounds); aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); - - /* setup IV */ - switch (ses->algo) { - case CRYPTO_AES_CBC: - /* Nothing todo */ - break; - case CRYPTO_AES_RFC4106_GCM_16: - bcopy(key + ((keylen - 32) / 8), ses->nonce, AESCTR_NONCESIZE); - arc4rand((void *)&ses->aesgcmcounter, sizeof(uint64_t), 0); - break; - case CRYPTO_AES_XTS: + if (ses->algo == CRYPTO_AES_CBC) + arc4rand(ses->iv, sizeof(ses->iv), 0); + else /* if (ses->algo == CRYPTO_AES_XTS) */ { aesni_set_enckey(key + keylen / 16, ses->xts_schedule, ses->rounds); - break; } return (0); |