diff options
author | Renato Botelho <renato@netgate.com> | 2015-08-17 13:53:22 -0300 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2015-08-17 13:53:22 -0300 |
commit | 86163f54d3288d43997b0766d4c2538ed7f70b17 (patch) | |
tree | f3aed79ce63c2fb0eea1b2595b71b007d200ed43 | |
parent | 46e99a8858f1c843c1774e472c11d422ca2163ae (diff) | |
download | FreeBSD-src-86163f54d3288d43997b0766d4c2538ed7f70b17.zip FreeBSD-src-86163f54d3288d43997b0766d4c2538ed7f70b17.tar.gz |
Importing pfSense patch aesgcm.hwaccl.diff
-rw-r--r-- | sys/crypto/aesni/aesni.c | 401 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.h | 15 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_ghash.c | 523 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_wrap.c | 37 | ||||
-rw-r--r-- | sys/modules/aesni/Makefile | 8 |
5 files changed, 862 insertions, 122 deletions
diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index 7d7a740..e1bd5e8 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -39,14 +39,15 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/bus.h> #include <sys/uio.h> +#include <sys/mbuf.h> #include <crypto/aesni/aesni.h> #include <cryptodev_if.h> +#include <opencrypto/gmac.h> struct aesni_softc { int32_t cid; - uint32_t sid; - TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; - struct rwlock lock; + volatile uint32_t nsessions; + struct aesni_session *sessions; }; static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); @@ -56,7 +57,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc, static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); static int aesni_cipher_process(struct aesni_session *ses, - struct cryptodesc *enccrd, struct cryptop *crp); + struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); @@ -79,12 +80,12 @@ aesni_probe(device_t dev) return (EINVAL); } - if ((cpu_feature & CPUID_SSE2) == 0) { - device_printf(dev, "No SSE2 support but AESNI!?!\n"); + if ((cpu_feature & CPUID2_SSE41) == 0 && (cpu_feature2 & CPUID2_SSE41) == 0) { + device_printf(dev, "No SSE4.1 support.\n"); return (EINVAL); } - device_set_desc_copy(dev, "AES-CBC,AES-XTS"); + device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM"); return (0); } @@ -94,8 +95,6 @@ aesni_attach(device_t dev) struct aesni_softc *sc; sc = device_get_softc(dev); - TAILQ_INIT(&sc->sessions); - sc->sid = 1; sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { @@ -103,9 +102,16 @@ aesni_attach(device_t dev) return (ENOMEM); } - rw_init(&sc->lock, "aesni_lock"); + sc->nsessions = 32; + sc->sessions = malloc(sc->nsessions * sizeof(struct aesni_session), + M_AESNI, M_WAITOK | M_ZERO); + crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_RFC4106_GCM_16, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_128_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_192_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_256_GMAC, 0, 0); return (0); } @@ -114,25 +120,24 @@ aesni_detach(device_t dev) { struct aesni_softc *sc; struct aesni_session *ses; + int i; sc = device_get_softc(dev); - rw_wlock(&sc->lock); - TAILQ_FOREACH(ses, &sc->sessions, next) { + for (i = 0; i < sc->nsessions; i++) { + ses = &sc->sessions[i]; if (ses->used) { - rw_wunlock(&sc->lock); device_printf(dev, "Cannot detach, sessions still active.\n"); return (EBUSY); } } - while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { - TAILQ_REMOVE(&sc->sessions, ses, next); - fpu_kern_free_ctx(ses->fpu_ctx); - free(ses, M_AESNI); - } - rw_wunlock(&sc->lock); - rw_destroy(&sc->lock); crypto_unregister_all(sc->cid); + for (i = 0; i < sc->nsessions; i++) { + ses = &sc->sessions[i]; + if (ses->fpu_ctx != NULL) + fpu_kern_free_ctx(ses->fpu_ctx); + } + free(sc->sessions, M_AESNI); return (0); } @@ -142,10 +147,12 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct aesni_softc *sc; struct aesni_session *ses; struct cryptoini *encini; - int error; + int error, sessn; - if (sidp == NULL || cri == NULL) + if (sidp == NULL || cri == NULL) { + printf("no sidp or cri"); return (EINVAL); + } sc = device_get_softc(dev); ses = NULL; @@ -153,55 +160,76 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: + if (encini != NULL) { + printf("encini already set"); + return (EINVAL); + } + encini = cri; + break; case CRYPTO_AES_XTS: - if (encini != NULL) + case CRYPTO_AES_RFC4106_GCM_16: + if (encini != NULL) { + printf("encini already set"); return (EINVAL); - encini = cri; + } + encini = cri; + break; + case CRYPTO_AES_128_GMAC: + case CRYPTO_AES_192_GMAC: + case CRYPTO_AES_256_GMAC: + /* + * nothing to do here, maybe in the future cache some + * values for GHASH + */ break; default: + printf("unhandled algorithm"); return (EINVAL); } } - if (encini == NULL) + if (encini == NULL) { + printf("no cipher"); return (EINVAL); + } - rw_wlock(&sc->lock); - /* - * Free sessions goes first, so if first session is used, we need to - * allocate one. - */ - ses = TAILQ_FIRST(&sc->sessions); - if (ses == NULL || ses->used) { - ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO); + for (sessn = 1; sessn < sc->nsessions; sessn++) { + if (!sc->sessions[sessn].used) { + ses = &sc->sessions[sessn]; + break; + } + } + if (ses == NULL) { + ses = malloc(sizeof(*ses) * sc->nsessions * 2, M_AESNI, M_NOWAIT | M_ZERO); if (ses == NULL) { - rw_wunlock(&sc->lock); + sc->sessions = ses; return (ENOMEM); } + bcopy((void *)sc->sessions, (void *)ses, sc->nsessions * sizeof(*ses)); + atomic_set_ptr((u_long *)sc->sessions, (u_long)ses); + bzero((void *)ses, sc->nsessions * sizeof(*ses)); + ses = &sc->sessions[sc->nsessions]; + ses->id = sc->nsessions; + atomic_add_int(&sc->nsessions, 1); + } else if (ses->id == 0) + ses->id = sessn; + + if (ses->fpu_ctx == NULL) { ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | FPU_KERN_NOWAIT); - if (ses->fpu_ctx == NULL) { - free(ses, M_AESNI); - rw_wunlock(&sc->lock); + if (ses->fpu_ctx == NULL) return (ENOMEM); - } - ses->id = sc->sid++; - } else { - TAILQ_REMOVE(&sc->sessions, ses, next); } - ses->used = 1; - TAILQ_INSERT_TAIL(&sc->sessions, ses, next); - rw_wunlock(&sc->lock); ses->algo = encini->cri_alg; error = aesni_cipher_setup(ses, encini); if (error != 0) { - rw_wlock(&sc->lock); + printf("setup failed"); aesni_freesession_locked(sc, ses); - rw_wunlock(&sc->lock); return (error); } - + ses->used = 1; *sidp = ses->id; + return (0); } @@ -212,12 +240,10 @@ aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) uint32_t sid; sid = ses->id; - TAILQ_REMOVE(&sc->sessions, ses, next); ctx = ses->fpu_ctx; bzero(ses, sizeof(*ses)); ses->id = sid; ses->fpu_ctx = ctx; - TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } static int @@ -229,17 +255,14 @@ aesni_freesession(device_t dev, uint64_t tid) sc = device_get_softc(dev); sid = ((uint32_t)tid) & 0xffffffff; - rw_wlock(&sc->lock); - TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { - if (ses->id == sid) - break; - } - if (ses == NULL) { - rw_wunlock(&sc->lock); + if (sid >= sc->nsessions) return (EINVAL); - } + + ses = &sc->sessions[sid]; + if (ses == NULL) + return (EINVAL); + aesni_freesession_locked(sc, ses); - rw_wunlock(&sc->lock); return (0); } @@ -248,20 +271,25 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; - struct cryptodesc *crd, *enccrd; - int error; + struct cryptodesc *crd, *enccrd, *authcrd; + uint32_t sid; + int error, needauth; error = 0; enccrd = NULL; + authcrd = NULL; + needauth = 0; /* Sanity check. */ if (crp == NULL) return (EINVAL); - if (crp->crp_callback == NULL || crp->crp_desc == NULL) { - error = EINVAL; - goto out; - } + if (crp->crp_callback == NULL || crp->crp_desc == NULL) + return (EINVAL); + + sid = ((uint32_t)crp->crp_sid) & 0xffffffff; + if (sid >= sc->nsessions) + return (EINVAL); for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { @@ -273,27 +301,51 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) } enccrd = crd; break; + + case CRYPTO_AES_RFC4106_GCM_16: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + needauth = 1; + break; + + case CRYPTO_AES_128_GMAC: + case CRYPTO_AES_192_GMAC: + case CRYPTO_AES_256_GMAC: + if (authcrd != NULL) { + error = EINVAL; + goto out; + } + authcrd = crd; + needauth = 1; + break; + default: return (EINVAL); } } - if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + + if (enccrd == NULL || (needauth && authcrd == NULL)) { error = EINVAL; goto out; } - rw_rlock(&sc->lock); - TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { - if (ses->id == (crp->crp_sid & 0xffffffff)) - break; + /* CBC & XTS can only handle full blocks for now */ + if ((enccrd->crd_len == CRYPTO_AES_CBC || enccrd->crd_len == + CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + error = EINVAL; + goto out; } - rw_runlock(&sc->lock); + + ses = &sc->sessions[sid]; if (ses == NULL) { error = EINVAL; goto out; } - error = aesni_cipher_process(ses, enccrd, crp); + error = aesni_cipher_process(ses, enccrd, authcrd, crp); if (error != 0) goto out; @@ -307,13 +359,17 @@ uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { + struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; - if (crp->crp_flags & CRYPTO_F_IMBUF) - goto alloc; - else if (crp->crp_flags & CRYPTO_F_IOV) { + if (crp->crp_flags & CRYPTO_F_IMBUF) { + m = (struct mbuf *)crp->crp_buf; + if (m->m_next != NULL) + goto alloc; + addr = mtod(m, uint8_t *); + } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; @@ -322,6 +378,7 @@ aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, } else addr = (u_char *)crp->crp_buf; *allocated = 0; + addr += enccrd->crd_skip; return (addr); alloc: @@ -366,83 +423,201 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) int error; td = curthread; + critical_enter(); error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) + if (error != 0) { + critical_exit(); return (error); + } error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); fpu_kern_leave(td, ses->fpu_ctx); + critical_exit(); return (error); } +#ifdef AESNI_DEBUG +static void +aesni_printhexstr(uint8_t *ptr, int len) +{ + int i; + + for (i = 0; i < len; i++) + printf("%02hhx", ptr[i]); +} +#endif + static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, - struct cryptop *crp) + struct cryptodesc *authcrd, struct cryptop *crp) { + uint8_t *tag; + uint8_t *iv; struct thread *td; - uint8_t *buf; - int error, allocated; + uint8_t *buf, *authbuf; + int error, allocated, authallocated; + int ivlen, encflag, i; + + encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); - td = curthread; - error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - goto out1; + authbuf = NULL; + authallocated = 0; + if (authcrd != NULL) { + authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); + if (authbuf == NULL) { + error = ENOMEM; + goto out1; + } + /* NOTE: GMAC_DIGEST_LEN == AES_BLOCK_LEN */ + tag = authcrd->crd_iv; + } - if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { - error = aesni_cipher_setup_common(ses, enccrd->crd_key, - enccrd->crd_klen); - if (error != 0) - goto out; + iv = enccrd->crd_iv; + /* XXX - validate that enccrd and authcrd have/use same key? */ + switch (enccrd->crd_alg) { + case CRYPTO_AES_CBC: + ivlen = 16; + break; + case CRYPTO_AES_XTS: + ivlen = 8; + break; + case CRYPTO_AES_RFC4106_GCM_16: + /* Be smart at determining the ivlen until better ways are present */ + ivlen = enccrd->crd_skip - enccrd->crd_inject; + ivlen += 4; + break; } - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + /* Setup ses->iv */ + if (encflag) { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); - if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) - crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { - aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, - enccrd->crd_len, buf, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { - aesni_encrypt_xts(ses->rounds, ses->enc_schedule, - ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); + bcopy(enccrd->crd_iv, iv, ivlen); + else if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) { + if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) { + for (i = 0; i < AESCTR_NONCESIZE; i++) + iv[i] = ses->nonce[i]; + /* XXX: Is this enough? */ + u_long counter = atomic_fetchadd_long(&ses->aesgcmcounter, 1); + bcopy((void *)&counter, iv + AESCTR_NONCESIZE, sizeof(uint64_t)); + crypto_copyback(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE); + } else { + arc4rand(iv, AES_BLOCK_LEN, 0); + crypto_copyback(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, ivlen, iv); + } } } else { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + bcopy(enccrd->crd_iv, iv, ivlen); + else { + if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) { + for (i = 0; i < AESCTR_NONCESIZE; i++) + iv[i] = ses->nonce[i]; + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE); + } else + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, ivlen, iv); + } + } +#ifdef AESNI_DEBUG + aesni_printhexstr(iv, ivlen); + printf("\n"); +#endif + + if (authcrd != NULL && !encflag) { + crypto_copydata(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + } else { +#ifdef AESNI_DEBUG + printf("ptag: "); + aesni_printhexstr(tag, sizeof tag); + printf("\n"); +#endif + bzero(tag, sizeof tag); + } + + td = curthread; + + critical_enter(); + error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | + FPU_KERN_KTHR); + if (error != 0) { + critical_exit(); + goto out1; + } + /* Do work */ + switch (ses->algo) { + case CRYPTO_AES_CBC: + if (encflag) + aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, iv); else - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, - enccrd->crd_len, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { + enccrd->crd_len, buf, iv); + break; + case CRYPTO_AES_XTS: + if (encflag) + aesni_encrypt_xts(ses->rounds, ses->enc_schedule, + ses->xts_schedule, enccrd->crd_len, buf, buf, + iv); + else aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); + iv); + break; + case CRYPTO_AES_RFC4106_GCM_16: +#ifdef AESNI_DEBUG + printf("GCM: %d\n", encflag); + printf("buf(%d): ", enccrd->crd_len); + aesni_printhexstr(buf, enccrd->crd_len); + printf("\nauthbuf(%d): ", authcrd->crd_len); + aesni_printhexstr(authbuf, authcrd->crd_len); + printf("\niv: "); + aesni_printhexstr(iv, ivlen); + printf("\ntag: "); + aesni_printhexstr(tag, 16); + printf("\nsched: "); + aesni_printhexstr(ses->enc_schedule, 16 * (ses->rounds + 1)); + printf("\n"); +#endif + if (encflag) + AES_GCM_encrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds); + else { + if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds)) + error = EBADMSG; } + break; } + fpu_kern_leave(td, ses->fpu_ctx); + critical_exit(); + if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, - AES_BLOCK_LEN, ses->iv); -out: - fpu_kern_leave(td, ses->fpu_ctx); + + if (!error && authcrd != NULL) { + crypto_copyback(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, crp->crp_ilen - authcrd->crd_inject, tag); + } + out1: if (allocated) { bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } + if (authallocated) + free(authbuf, M_AESNI); + return (error); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index ff1d1a2..fbbbefb 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -56,7 +56,9 @@ struct aesni_session { uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16); - uint8_t iv[AES_BLOCK_LEN]; + /* AES-GCM needs a counter hence the separated enc/dec IV */ + uint8_t nonce[4]; + volatile uint64_t aesgcmcounter; int algo; int rounds; /* uint8_t *ses_ictx; */ @@ -64,7 +66,6 @@ struct aesni_session { /* int ses_mlen; */ int used; uint32_t id; - TAILQ_ENTRY(aesni_session) next; struct fpu_kern_ctx *fpu_ctx; }; @@ -96,6 +97,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +/* GCM & GHASH functions */ +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, int nbytes, int abytes, int ibytes, + const unsigned char *key, int nr); +int AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, int nbytes, int abytes, int ibytes, + const unsigned char *key, int nr); + int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen); uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c new file mode 100644 index 0000000..16e6f53 --- /dev/null +++ b/sys/crypto/aesni/aesni_ghash.c @@ -0,0 +1,523 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship from the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * $Id$ + * + */ + +/* + * Figure 5, 7, and 11 are copied from the Intel white paper: Intel + * s Multiplication Instruction and its Usage for Computing the GCM Mode + * + * and as such are: Copyright © 2010 Intel Corporation. All rights reserved. + * + * Please see white paper for complete license. + */ + +#ifdef _KERNEL +#include <crypto/aesni/aesni.h> +#else +#include <stdint.h> +#endif + +#include <wmmintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> + +/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */ + +#define REFLECT(X) \ + hlp1 = _mm_srli_epi16(X,4);\ + X = _mm_and_si128(AMASK,X);\ + hlp1 = _mm_and_si128(AMASK,hlp1);\ + X = _mm_shuffle_epi8(MASKH,X);\ + hlp1 = _mm_shuffle_epi8(MASKL,hlp1);\ + X = _mm_xor_si128(X,hlp1) + +static inline int +m128icmp(__m128i a, __m128i b) +{ + __m128i cmp; + + cmp = _mm_cmpeq_epi32(a, b); + + return _mm_movemask_epi8(cmp) == 0xffff; +} + +/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */ +static void +gfmul_decrypt(__m128i a, __m128i b, __m128i * res) +{ + __m128i /* tmp0, tmp1, tmp2, */ tmp3, tmp4, tmp5, tmp6, + tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; + __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0); + + tmp3 = _mm_clmulepi64_si128(a, b, 0x00); + tmp6 = _mm_clmulepi64_si128(a, b, 0x11); + tmp4 = _mm_shuffle_epi32(a, 78); + tmp5 = _mm_shuffle_epi32(b, 78); + tmp4 = _mm_xor_si128(tmp4, a); + tmp5 = _mm_xor_si128(tmp5, b); + tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); + tmp4 = _mm_xor_si128(tmp4, tmp3); + tmp4 = _mm_xor_si128(tmp4, tmp6); + tmp5 = _mm_slli_si128(tmp4, 8); + tmp4 = _mm_srli_si128(tmp4, 8); + tmp3 = _mm_xor_si128(tmp3, tmp5); + tmp6 = _mm_xor_si128(tmp6, tmp4); + tmp7 = _mm_srli_epi32(tmp6, 31); + tmp8 = _mm_srli_epi32(tmp6, 30); + tmp9 = _mm_srli_epi32(tmp6, 25); + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_shuffle_epi32(tmp7, 147); + + tmp7 = _mm_and_si128(XMMMASK, tmp8); + tmp8 = _mm_andnot_si128(XMMMASK, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp8); + tmp6 = _mm_xor_si128(tmp6, tmp7); + tmp10 = _mm_slli_epi32(tmp6, 1); + tmp3 = _mm_xor_si128(tmp3, tmp10); + tmp11 = _mm_slli_epi32(tmp6, 2); + tmp3 = _mm_xor_si128(tmp3, tmp11); + tmp12 = _mm_slli_epi32(tmp6, 7); + tmp3 = _mm_xor_si128(tmp3, tmp12); + + *res = _mm_xor_si128(tmp3, tmp6); +} + +void +AES_GCM_encrypt(const unsigned char *in, + unsigned char *out, + const unsigned char *addt, + const unsigned char *ivec, + unsigned char *tag, + int nbytes, + int abytes, + int ibytes, + const unsigned char *key, + int nr) +{ + int i , j, k; + __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ; + __m128i tmp1 , tmp2, tmp3, tmp4; + __m128i H , T; + __m128i *KEY = (__m128i *) key; + __m128i ctr1 , ctr2, ctr3, ctr4; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i FOUR = _mm_set_epi32(0, 4, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, + 6, 7); + /* + * __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + * 10, 11, 12, 13, 14, 15); + */ + __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128(); + __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f); + __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800); + __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000); + __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + + if (ibytes == 96 / 8) { + Y = _mm_loadu_si128((__m128i *) ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /* (Compute E[ZERO, KS] and E[Y0, KS] together */ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + REFLECT(H); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j = 1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + REFLECT(H); + Y = _mm_xor_si128(Y, Y); + for (i = 0; i < ibytes / 16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]); + REFLECT(tmp1); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + } + if (ibytes % 16) { + for (j = 0; j < ibytes % 16; j++) + ((unsigned char *)&last_block)[j] = ivec[i * 16 + j]; + tmp1 = last_block; + REFLECT(tmp1); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + REFLECT(tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, MASKF); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + REFLECT(Y); + /* Compute E(K, Y0) */ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j = 1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + for (i = 0; i < abytes / 16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]); + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + if (abytes % 16) { + last_block = _mm_setzero_si128(); + for (j = 0; j < abytes % 16; j++) + ((unsigned char *)&last_block)[j] = addt[i * 16 + j]; + tmp1 = last_block; + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + for (i = 0; i < nbytes / 16 / 4; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, FOUR); + ctr2 = _mm_add_epi64(ctr2, FOUR); + ctr3 = _mm_add_epi64(ctr3, FOUR); + ctr4 = _mm_add_epi64(ctr4, FOUR); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp2 = _mm_xor_si128(tmp2, KEY[0]); + tmp3 = _mm_xor_si128(tmp3, KEY[0]); + tmp4 = _mm_xor_si128(tmp4, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0])); + tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1])); + tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2])); + tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3])); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4); + REFLECT(tmp1); + REFLECT(tmp2); + REFLECT(tmp3); + REFLECT(tmp4); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + X = _mm_xor_si128(X, tmp2); + gfmul_decrypt(X, H, &X); + X = _mm_xor_si128(X, tmp3); + gfmul_decrypt(X, H, &X); + X = _mm_xor_si128(X, tmp4); + gfmul_decrypt(X, H, &X); + } + for (k = i * 4; k < nbytes / 16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); + _mm_storeu_si128(&((__m128i *) out)[k], tmp1); + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + //If one partial block remains + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); + last_block = tmp1; + for (j = 0; j < nbytes % 16; j++) + out[k * 16 + j] = ((unsigned char *)&last_block)[j]; + for (; j < 16; j++) + ((unsigned char *)&last_block)[j] = 0; + tmp1 = last_block; + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1); + REFLECT(tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, MASKF); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + REFLECT(X); + T = _mm_xor_si128(X, T); + _mm_storeu_si128((__m128i *) tag, T); +} + +int +AES_GCM_decrypt(const unsigned char *in, + unsigned char *out, + const unsigned char *addt, + const unsigned char *ivec, + unsigned char *tag, + int nbytes, + int abytes, + int ibytes, + const unsigned char *key, + int nr) +{ + int i , j, k; + __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ; + __m128i tmp1 , tmp2, tmp3, tmp4; + __m128i H , T; + __m128i *KEY = (__m128i *) key; + __m128i ctr1 , ctr2, ctr3, ctr4; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i FOUR = _mm_set_epi32(0, 4, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, + 6, 7); + __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15); + __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128(); + __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f); + __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800); + __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000); + __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + + if (ibytes == 96 / 8) { + Y = _mm_loadu_si128((__m128i *) ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /* (Compute E[ZERO, KS] and E[Y0, KS] together */ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); + }; + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + REFLECT(H); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j = 1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + REFLECT(H); + Y = _mm_xor_si128(Y, Y); + for (i = 0; i < ibytes / 16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]); + REFLECT(tmp1); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + } + if (ibytes % 16) { + for (j = 0; j < ibytes % 16; j++) + ((unsigned char *)&last_block)[j] = ivec[i * 16 + j]; + tmp1 = last_block; + REFLECT(tmp1); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + REFLECT(tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, MASKF); + Y = _mm_xor_si128(Y, tmp1); + gfmul_decrypt(Y, H, &Y); + REFLECT(Y); + /* Compute E(K, Y0) */ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j = 1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + for (i = 0; i < abytes / 16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]); + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + if (abytes % 16) { + last_block = _mm_setzero_si128(); + for (j = 0; j < abytes % 16; j++) + ((unsigned char *)&last_block)[j] = addt[i * 16 + j]; + tmp1 = last_block; + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + for (i = 0; i < nbytes / 16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i *) in)[i]); + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + if (nbytes % 16) { + last_block = _mm_setzero_si128(); + for (j = 0; j < nbytes % 16; j++) + ((unsigned char *)&last_block)[j] = in[i * 16 + j]; + tmp1 = last_block; + REFLECT(tmp1); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + } + tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1); + REFLECT(tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, MASKF); + X = _mm_xor_si128(X, tmp1); + gfmul_decrypt(X, H, &X); + REFLECT(X); + T = _mm_xor_si128(X, T); + if (!m128icmp(T, _mm_loadu_si128((__m128i*)tag))) + return 0; //in case the authentication failed + + //in case the authentication failed + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + for (i = 0; i < nbytes / 16 / 4; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, FOUR); + ctr2 = _mm_add_epi64(ctr2, FOUR); + ctr3 = _mm_add_epi64(ctr3, FOUR); + ctr4 = _mm_add_epi64(ctr4, FOUR); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp2 = _mm_xor_si128(tmp2, KEY[0]); + tmp3 = _mm_xor_si128(tmp3, KEY[0]); + tmp4 = _mm_xor_si128(tmp4, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0])); + tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1])); + tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2])); + tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3])); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3); + _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + } + for (k = i * 4; k < nbytes / 16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); + _mm_storeu_si128(&((__m128i *) out)[k], tmp1); + } + //If one partial block remains + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j = 1; j < nr - 1; j += 2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k])); + last_block = tmp1; + for (j = 0; j < nbytes % 16; j++) + out[k * 16 + j] = ((unsigned char *)&last_block)[j]; + } + return 1; + //when sucessfull returns 1 +} diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 39819a6..83d79fc 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -176,10 +176,6 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, } } -#define AES_XTS_BLOCKSIZE 16 -#define AES_XTS_IVSIZE 8 -#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ - static inline __m128i xts_crank_lfsr(__m128i inp) { @@ -347,6 +343,23 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + printf("invalid CBC/GCM key length"); + return (EINVAL); + } + break; + case CRYPTO_AES_RFC4106_GCM_16: + switch (keylen) { + case 160: + ses->rounds = AES128_ROUNDS; + break; + case 224: + ses->rounds = AES192_ROUNDS; + break; + case 288: + ses->rounds = AES256_ROUNDS; + break; + default: + printf("invalid CBC/GCM key length"); return (EINVAL); } break; @@ -359,6 +372,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + printf("invalid XTS key length"); return (EINVAL); } break; @@ -368,11 +382,20 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, aesni_set_enckey(key, ses->enc_schedule, ses->rounds); aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); - if (ses->algo == CRYPTO_AES_CBC) - arc4rand(ses->iv, sizeof(ses->iv), 0); - else /* if (ses->algo == CRYPTO_AES_XTS) */ { + + /* setup IV */ + switch (ses->algo) { + case CRYPTO_AES_CBC: + /* Nothing todo */ + break; + case CRYPTO_AES_RFC4106_GCM_16: + bcopy(key + ((keylen - 32) / 8), ses->nonce, AESCTR_NONCESIZE); + arc4rand((void *)&ses->aesgcmcounter, sizeof(uint64_t), 0); + break; + case CRYPTO_AES_XTS: aesni_set_enckey(key + keylen / 16, ses->xts_schedule, ses->rounds); + break; } return (0); diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile index 26dbedc..e66f941 100644 --- a/sys/modules/aesni/Makefile +++ b/sys/modules/aesni/Makefile @@ -15,5 +15,13 @@ aesni_wrap.o: aesni_wrap.c -mmmx -msse -maes ${.IMPSRC} ${CTFCONVERT_CMD} +.if ${MACHINE_CPUARCH} == "amd64" +OBJS+= aesni_ghash.o +aesni_ghash.o: aesni_ghash.c + ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \ + -mmmx -mpclmul -msse -msse4.1 -maes ${.IMPSRC} + ${CTFCONVERT_CMD} +.endif + .include <bsd.kmod.mk> |