summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2015-08-17 13:53:22 -0300
committerRenato Botelho <renato@netgate.com>2015-08-17 13:53:22 -0300
commit86163f54d3288d43997b0766d4c2538ed7f70b17 (patch)
treef3aed79ce63c2fb0eea1b2595b71b007d200ed43
parent46e99a8858f1c843c1774e472c11d422ca2163ae (diff)
downloadFreeBSD-src-86163f54d3288d43997b0766d4c2538ed7f70b17.zip
FreeBSD-src-86163f54d3288d43997b0766d4c2538ed7f70b17.tar.gz
Importing pfSense patch aesgcm.hwaccl.diff
-rw-r--r--sys/crypto/aesni/aesni.c401
-rw-r--r--sys/crypto/aesni/aesni.h15
-rw-r--r--sys/crypto/aesni/aesni_ghash.c523
-rw-r--r--sys/crypto/aesni/aesni_wrap.c37
-rw-r--r--sys/modules/aesni/Makefile8
5 files changed, 862 insertions, 122 deletions
diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c
index 7d7a740..e1bd5e8 100644
--- a/sys/crypto/aesni/aesni.c
+++ b/sys/crypto/aesni/aesni.c
@@ -39,14 +39,15 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/bus.h>
#include <sys/uio.h>
+#include <sys/mbuf.h>
#include <crypto/aesni/aesni.h>
#include <cryptodev_if.h>
+#include <opencrypto/gmac.h>
struct aesni_softc {
int32_t cid;
- uint32_t sid;
- TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions;
- struct rwlock lock;
+ volatile uint32_t nsessions;
+ struct aesni_session *sessions;
};
static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri);
@@ -56,7 +57,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc,
static int aesni_cipher_setup(struct aesni_session *ses,
struct cryptoini *encini);
static int aesni_cipher_process(struct aesni_session *ses,
- struct cryptodesc *enccrd, struct cryptop *crp);
+ struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp);
MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data");
@@ -79,12 +80,12 @@ aesni_probe(device_t dev)
return (EINVAL);
}
- if ((cpu_feature & CPUID_SSE2) == 0) {
- device_printf(dev, "No SSE2 support but AESNI!?!\n");
+ if ((cpu_feature & CPUID2_SSE41) == 0 && (cpu_feature2 & CPUID2_SSE41) == 0) {
+ device_printf(dev, "No SSE4.1 support.\n");
return (EINVAL);
}
- device_set_desc_copy(dev, "AES-CBC,AES-XTS");
+ device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM");
return (0);
}
@@ -94,8 +95,6 @@ aesni_attach(device_t dev)
struct aesni_softc *sc;
sc = device_get_softc(dev);
- TAILQ_INIT(&sc->sessions);
- sc->sid = 1;
sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE |
CRYPTOCAP_F_SYNC);
if (sc->cid < 0) {
@@ -103,9 +102,16 @@ aesni_attach(device_t dev)
return (ENOMEM);
}
- rw_init(&sc->lock, "aesni_lock");
+ sc->nsessions = 32;
+ sc->sessions = malloc(sc->nsessions * sizeof(struct aesni_session),
+ M_AESNI, M_WAITOK | M_ZERO);
+
crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0);
crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_RFC4106_GCM_16, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_128_GMAC, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_192_GMAC, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_256_GMAC, 0, 0);
return (0);
}
@@ -114,25 +120,24 @@ aesni_detach(device_t dev)
{
struct aesni_softc *sc;
struct aesni_session *ses;
+ int i;
sc = device_get_softc(dev);
- rw_wlock(&sc->lock);
- TAILQ_FOREACH(ses, &sc->sessions, next) {
+ for (i = 0; i < sc->nsessions; i++) {
+ ses = &sc->sessions[i];
if (ses->used) {
- rw_wunlock(&sc->lock);
device_printf(dev,
"Cannot detach, sessions still active.\n");
return (EBUSY);
}
}
- while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) {
- TAILQ_REMOVE(&sc->sessions, ses, next);
- fpu_kern_free_ctx(ses->fpu_ctx);
- free(ses, M_AESNI);
- }
- rw_wunlock(&sc->lock);
- rw_destroy(&sc->lock);
crypto_unregister_all(sc->cid);
+ for (i = 0; i < sc->nsessions; i++) {
+ ses = &sc->sessions[i];
+ if (ses->fpu_ctx != NULL)
+ fpu_kern_free_ctx(ses->fpu_ctx);
+ }
+ free(sc->sessions, M_AESNI);
return (0);
}
@@ -142,10 +147,12 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
struct aesni_softc *sc;
struct aesni_session *ses;
struct cryptoini *encini;
- int error;
+ int error, sessn;
- if (sidp == NULL || cri == NULL)
+ if (sidp == NULL || cri == NULL) {
+ printf("no sidp or cri");
return (EINVAL);
+ }
sc = device_get_softc(dev);
ses = NULL;
@@ -153,55 +160,76 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
for (; cri != NULL; cri = cri->cri_next) {
switch (cri->cri_alg) {
case CRYPTO_AES_CBC:
+ if (encini != NULL) {
+ printf("encini already set");
+ return (EINVAL);
+ }
+ encini = cri;
+ break;
case CRYPTO_AES_XTS:
- if (encini != NULL)
+ case CRYPTO_AES_RFC4106_GCM_16:
+ if (encini != NULL) {
+ printf("encini already set");
return (EINVAL);
- encini = cri;
+ }
+ encini = cri;
+ break;
+ case CRYPTO_AES_128_GMAC:
+ case CRYPTO_AES_192_GMAC:
+ case CRYPTO_AES_256_GMAC:
+ /*
+ * nothing to do here, maybe in the future cache some
+ * values for GHASH
+ */
break;
default:
+ printf("unhandled algorithm");
return (EINVAL);
}
}
- if (encini == NULL)
+ if (encini == NULL) {
+ printf("no cipher");
return (EINVAL);
+ }
- rw_wlock(&sc->lock);
- /*
- * Free sessions goes first, so if first session is used, we need to
- * allocate one.
- */
- ses = TAILQ_FIRST(&sc->sessions);
- if (ses == NULL || ses->used) {
- ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO);
+ for (sessn = 1; sessn < sc->nsessions; sessn++) {
+ if (!sc->sessions[sessn].used) {
+ ses = &sc->sessions[sessn];
+ break;
+ }
+ }
+ if (ses == NULL) {
+ ses = malloc(sizeof(*ses) * sc->nsessions * 2, M_AESNI, M_NOWAIT | M_ZERO);
if (ses == NULL) {
- rw_wunlock(&sc->lock);
+ sc->sessions = ses;
return (ENOMEM);
}
+ bcopy((void *)sc->sessions, (void *)ses, sc->nsessions * sizeof(*ses));
+ atomic_set_ptr((u_long *)sc->sessions, (u_long)ses);
+ bzero((void *)ses, sc->nsessions * sizeof(*ses));
+ ses = &sc->sessions[sc->nsessions];
+ ses->id = sc->nsessions;
+ atomic_add_int(&sc->nsessions, 1);
+ } else if (ses->id == 0)
+ ses->id = sessn;
+
+ if (ses->fpu_ctx == NULL) {
ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL |
FPU_KERN_NOWAIT);
- if (ses->fpu_ctx == NULL) {
- free(ses, M_AESNI);
- rw_wunlock(&sc->lock);
+ if (ses->fpu_ctx == NULL)
return (ENOMEM);
- }
- ses->id = sc->sid++;
- } else {
- TAILQ_REMOVE(&sc->sessions, ses, next);
}
- ses->used = 1;
- TAILQ_INSERT_TAIL(&sc->sessions, ses, next);
- rw_wunlock(&sc->lock);
ses->algo = encini->cri_alg;
error = aesni_cipher_setup(ses, encini);
if (error != 0) {
- rw_wlock(&sc->lock);
+ printf("setup failed");
aesni_freesession_locked(sc, ses);
- rw_wunlock(&sc->lock);
return (error);
}
-
+ ses->used = 1;
*sidp = ses->id;
+
return (0);
}
@@ -212,12 +240,10 @@ aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses)
uint32_t sid;
sid = ses->id;
- TAILQ_REMOVE(&sc->sessions, ses, next);
ctx = ses->fpu_ctx;
bzero(ses, sizeof(*ses));
ses->id = sid;
ses->fpu_ctx = ctx;
- TAILQ_INSERT_HEAD(&sc->sessions, ses, next);
}
static int
@@ -229,17 +255,14 @@ aesni_freesession(device_t dev, uint64_t tid)
sc = device_get_softc(dev);
sid = ((uint32_t)tid) & 0xffffffff;
- rw_wlock(&sc->lock);
- TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) {
- if (ses->id == sid)
- break;
- }
- if (ses == NULL) {
- rw_wunlock(&sc->lock);
+ if (sid >= sc->nsessions)
return (EINVAL);
- }
+
+ ses = &sc->sessions[sid];
+ if (ses == NULL)
+ return (EINVAL);
+
aesni_freesession_locked(sc, ses);
- rw_wunlock(&sc->lock);
return (0);
}
@@ -248,20 +271,25 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
{
struct aesni_softc *sc = device_get_softc(dev);
struct aesni_session *ses = NULL;
- struct cryptodesc *crd, *enccrd;
- int error;
+ struct cryptodesc *crd, *enccrd, *authcrd;
+ uint32_t sid;
+ int error, needauth;
error = 0;
enccrd = NULL;
+ authcrd = NULL;
+ needauth = 0;
/* Sanity check. */
if (crp == NULL)
return (EINVAL);
- if (crp->crp_callback == NULL || crp->crp_desc == NULL) {
- error = EINVAL;
- goto out;
- }
+ if (crp->crp_callback == NULL || crp->crp_desc == NULL)
+ return (EINVAL);
+
+ sid = ((uint32_t)crp->crp_sid) & 0xffffffff;
+ if (sid >= sc->nsessions)
+ return (EINVAL);
for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) {
switch (crd->crd_alg) {
@@ -273,27 +301,51 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
}
enccrd = crd;
break;
+
+ case CRYPTO_AES_RFC4106_GCM_16:
+ if (enccrd != NULL) {
+ error = EINVAL;
+ goto out;
+ }
+ enccrd = crd;
+ needauth = 1;
+ break;
+
+ case CRYPTO_AES_128_GMAC:
+ case CRYPTO_AES_192_GMAC:
+ case CRYPTO_AES_256_GMAC:
+ if (authcrd != NULL) {
+ error = EINVAL;
+ goto out;
+ }
+ authcrd = crd;
+ needauth = 1;
+ break;
+
default:
return (EINVAL);
}
}
- if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) {
+
+ if (enccrd == NULL || (needauth && authcrd == NULL)) {
error = EINVAL;
goto out;
}
- rw_rlock(&sc->lock);
- TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) {
- if (ses->id == (crp->crp_sid & 0xffffffff))
- break;
+ /* CBC & XTS can only handle full blocks for now */
+ if ((enccrd->crd_len == CRYPTO_AES_CBC || enccrd->crd_len ==
+ CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) {
+ error = EINVAL;
+ goto out;
}
- rw_runlock(&sc->lock);
+
+ ses = &sc->sessions[sid];
if (ses == NULL) {
error = EINVAL;
goto out;
}
- error = aesni_cipher_process(ses, enccrd, crp);
+ error = aesni_cipher_process(ses, enccrd, authcrd, crp);
if (error != 0)
goto out;
@@ -307,13 +359,17 @@ uint8_t *
aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
int *allocated)
{
+ struct mbuf *m;
struct uio *uio;
struct iovec *iov;
uint8_t *addr;
- if (crp->crp_flags & CRYPTO_F_IMBUF)
- goto alloc;
- else if (crp->crp_flags & CRYPTO_F_IOV) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF) {
+ m = (struct mbuf *)crp->crp_buf;
+ if (m->m_next != NULL)
+ goto alloc;
+ addr = mtod(m, uint8_t *);
+ } else if (crp->crp_flags & CRYPTO_F_IOV) {
uio = (struct uio *)crp->crp_buf;
if (uio->uio_iovcnt != 1)
goto alloc;
@@ -322,6 +378,7 @@ aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
} else
addr = (u_char *)crp->crp_buf;
*allocated = 0;
+ addr += enccrd->crd_skip;
return (addr);
alloc:
@@ -366,83 +423,201 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
int error;
td = curthread;
+ critical_enter();
error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL |
FPU_KERN_KTHR);
- if (error != 0)
+ if (error != 0) {
+ critical_exit();
return (error);
+ }
error = aesni_cipher_setup_common(ses, encini->cri_key,
encini->cri_klen);
fpu_kern_leave(td, ses->fpu_ctx);
+ critical_exit();
return (error);
}
+#ifdef AESNI_DEBUG
+static void
+aesni_printhexstr(uint8_t *ptr, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ printf("%02hhx", ptr[i]);
+}
+#endif
+
static int
aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
- struct cryptop *crp)
+ struct cryptodesc *authcrd, struct cryptop *crp)
{
+ uint8_t *tag;
+ uint8_t *iv;
struct thread *td;
- uint8_t *buf;
- int error, allocated;
+ uint8_t *buf, *authbuf;
+ int error, allocated, authallocated;
+ int ivlen, encflag, i;
+
+ encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT;
buf = aesni_cipher_alloc(enccrd, crp, &allocated);
if (buf == NULL)
return (ENOMEM);
- td = curthread;
- error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL |
- FPU_KERN_KTHR);
- if (error != 0)
- goto out1;
+ authbuf = NULL;
+ authallocated = 0;
+ if (authcrd != NULL) {
+ authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated);
+ if (authbuf == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ /* NOTE: GMAC_DIGEST_LEN == AES_BLOCK_LEN */
+ tag = authcrd->crd_iv;
+ }
- if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
- error = aesni_cipher_setup_common(ses, enccrd->crd_key,
- enccrd->crd_klen);
- if (error != 0)
- goto out;
+ iv = enccrd->crd_iv;
+ /* XXX - validate that enccrd and authcrd have/use same key? */
+ switch (enccrd->crd_alg) {
+ case CRYPTO_AES_CBC:
+ ivlen = 16;
+ break;
+ case CRYPTO_AES_XTS:
+ ivlen = 8;
+ break;
+ case CRYPTO_AES_RFC4106_GCM_16:
+ /* Be smart at determining the ivlen until better ways are present */
+ ivlen = enccrd->crd_skip - enccrd->crd_inject;
+ ivlen += 4;
+ break;
}
- if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
+ /* Setup ses->iv */
+ if (encflag) {
if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
- bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
- if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
- crypto_copyback(crp->crp_flags, crp->crp_buf,
- enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
- if (ses->algo == CRYPTO_AES_CBC) {
- aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
- enccrd->crd_len, buf, buf, ses->iv);
- } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
- aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
- ses->xts_schedule, enccrd->crd_len, buf, buf,
- ses->iv);
+ bcopy(enccrd->crd_iv, iv, ivlen);
+ else if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
+ if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) {
+ for (i = 0; i < AESCTR_NONCESIZE; i++)
+ iv[i] = ses->nonce[i];
+ /* XXX: Is this enough? */
+ u_long counter = atomic_fetchadd_long(&ses->aesgcmcounter, 1);
+ bcopy((void *)&counter, iv + AESCTR_NONCESIZE, sizeof(uint64_t));
+ crypto_copyback(crp->crp_flags, crp->crp_buf,
+ enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE);
+ } else {
+ arc4rand(iv, AES_BLOCK_LEN, 0);
+ crypto_copyback(crp->crp_flags, crp->crp_buf,
+ enccrd->crd_inject, ivlen, iv);
+ }
}
} else {
if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
- bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
+ bcopy(enccrd->crd_iv, iv, ivlen);
+ else {
+ if (enccrd->crd_alg == CRYPTO_AES_RFC4106_GCM_16) {
+ for (i = 0; i < AESCTR_NONCESIZE; i++)
+ iv[i] = ses->nonce[i];
+ crypto_copydata(crp->crp_flags, crp->crp_buf,
+ enccrd->crd_inject, AESCTR_IVSIZE, iv + AESCTR_NONCESIZE);
+ } else
+ crypto_copydata(crp->crp_flags, crp->crp_buf,
+ enccrd->crd_inject, ivlen, iv);
+ }
+ }
+#ifdef AESNI_DEBUG
+ aesni_printhexstr(iv, ivlen);
+ printf("\n");
+#endif
+
+ if (authcrd != NULL && !encflag) {
+ crypto_copydata(crp->crp_flags, crp->crp_buf,
+ authcrd->crd_inject, GMAC_DIGEST_LEN, tag);
+ } else {
+#ifdef AESNI_DEBUG
+ printf("ptag: ");
+ aesni_printhexstr(tag, sizeof tag);
+ printf("\n");
+#endif
+ bzero(tag, sizeof tag);
+ }
+
+ td = curthread;
+
+ critical_enter();
+ error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL |
+ FPU_KERN_KTHR);
+ if (error != 0) {
+ critical_exit();
+ goto out1;
+ }
+ /* Do work */
+ switch (ses->algo) {
+ case CRYPTO_AES_CBC:
+ if (encflag)
+ aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
+ enccrd->crd_len, buf, buf, iv);
else
- crypto_copydata(crp->crp_flags, crp->crp_buf,
- enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
- if (ses->algo == CRYPTO_AES_CBC) {
aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
- enccrd->crd_len, buf, ses->iv);
- } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
+ enccrd->crd_len, buf, iv);
+ break;
+ case CRYPTO_AES_XTS:
+ if (encflag)
+ aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
+ ses->xts_schedule, enccrd->crd_len, buf, buf,
+ iv);
+ else
aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
ses->xts_schedule, enccrd->crd_len, buf, buf,
- ses->iv);
+ iv);
+ break;
+ case CRYPTO_AES_RFC4106_GCM_16:
+#ifdef AESNI_DEBUG
+ printf("GCM: %d\n", encflag);
+ printf("buf(%d): ", enccrd->crd_len);
+ aesni_printhexstr(buf, enccrd->crd_len);
+ printf("\nauthbuf(%d): ", authcrd->crd_len);
+ aesni_printhexstr(authbuf, authcrd->crd_len);
+ printf("\niv: ");
+ aesni_printhexstr(iv, ivlen);
+ printf("\ntag: ");
+ aesni_printhexstr(tag, 16);
+ printf("\nsched: ");
+ aesni_printhexstr(ses->enc_schedule, 16 * (ses->rounds + 1));
+ printf("\n");
+#endif
+ if (encflag)
+ AES_GCM_encrypt(buf, buf, authbuf, iv, tag,
+ enccrd->crd_len, authcrd->crd_len, ivlen,
+ ses->enc_schedule, ses->rounds);
+ else {
+ if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag,
+ enccrd->crd_len, authcrd->crd_len, ivlen,
+ ses->enc_schedule, ses->rounds))
+ error = EBADMSG;
}
+ break;
}
+ fpu_kern_leave(td, ses->fpu_ctx);
+ critical_exit();
+
if (allocated)
crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
enccrd->crd_len, buf);
- if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
- crypto_copydata(crp->crp_flags, crp->crp_buf,
- enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
- AES_BLOCK_LEN, ses->iv);
-out:
- fpu_kern_leave(td, ses->fpu_ctx);
+
+ if (!error && authcrd != NULL) {
+ crypto_copyback(crp->crp_flags, crp->crp_buf,
+ authcrd->crd_inject, crp->crp_ilen - authcrd->crd_inject, tag);
+ }
+
out1:
if (allocated) {
bzero(buf, enccrd->crd_len);
free(buf, M_AESNI);
}
+ if (authallocated)
+ free(authbuf, M_AESNI);
+
return (error);
}
diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h
index ff1d1a2..fbbbefb 100644
--- a/sys/crypto/aesni/aesni.h
+++ b/sys/crypto/aesni/aesni.h
@@ -56,7 +56,9 @@ struct aesni_session {
uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16);
uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16);
uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16);
- uint8_t iv[AES_BLOCK_LEN];
+ /* AES-GCM needs a counter hence the separated enc/dec IV */
+ uint8_t nonce[4];
+ volatile uint64_t aesgcmcounter;
int algo;
int rounds;
/* uint8_t *ses_ictx; */
@@ -64,7 +66,6 @@ struct aesni_session {
/* int ses_mlen; */
int used;
uint32_t id;
- TAILQ_ENTRY(aesni_session) next;
struct fpu_kern_ctx *fpu_ctx;
};
@@ -96,6 +97,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/,
const void *tweak_schedule /*__aligned(16)*/, size_t len,
const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
+/* GCM & GHASH functions */
+void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ unsigned char *tag, int nbytes, int abytes, int ibytes,
+ const unsigned char *key, int nr);
+int AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ unsigned char *tag, int nbytes, int abytes, int ibytes,
+ const unsigned char *key, int nr);
+
int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
int keylen);
uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c
new file mode 100644
index 0000000..16e6f53
--- /dev/null
+++ b/sys/crypto/aesni/aesni_ghash.c
@@ -0,0 +1,523 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship from the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * $Id$
+ *
+ */
+
+/*
+ * Figure 5, 7, and 11 are copied from the Intel white paper: Intel
+ * s Multiplication Instruction and its Usage for Computing the GCM Mode
+ *
+ * and as such are: Copyright © 2010 Intel Corporation. All rights reserved.
+ *
+ * Please see white paper for complete license.
+ */
+
+#ifdef _KERNEL
+#include <crypto/aesni/aesni.h>
+#else
+#include <stdint.h>
+#endif
+
+#include <wmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */
+
+#define REFLECT(X) \
+ hlp1 = _mm_srli_epi16(X,4);\
+ X = _mm_and_si128(AMASK,X);\
+ hlp1 = _mm_and_si128(AMASK,hlp1);\
+ X = _mm_shuffle_epi8(MASKH,X);\
+ hlp1 = _mm_shuffle_epi8(MASKL,hlp1);\
+ X = _mm_xor_si128(X,hlp1)
+
+static inline int
+m128icmp(__m128i a, __m128i b)
+{
+ __m128i cmp;
+
+ cmp = _mm_cmpeq_epi32(a, b);
+
+ return _mm_movemask_epi8(cmp) == 0xffff;
+}
+
+/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */
+static void
+gfmul_decrypt(__m128i a, __m128i b, __m128i * res)
+{
+ __m128i /* tmp0, tmp1, tmp2, */ tmp3, tmp4, tmp5, tmp6,
+ tmp7, tmp8, tmp9, tmp10, tmp11, tmp12;
+ __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0);
+
+ tmp3 = _mm_clmulepi64_si128(a, b, 0x00);
+ tmp6 = _mm_clmulepi64_si128(a, b, 0x11);
+ tmp4 = _mm_shuffle_epi32(a, 78);
+ tmp5 = _mm_shuffle_epi32(b, 78);
+ tmp4 = _mm_xor_si128(tmp4, a);
+ tmp5 = _mm_xor_si128(tmp5, b);
+ tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00);
+ tmp4 = _mm_xor_si128(tmp4, tmp3);
+ tmp4 = _mm_xor_si128(tmp4, tmp6);
+ tmp5 = _mm_slli_si128(tmp4, 8);
+ tmp4 = _mm_srli_si128(tmp4, 8);
+ tmp3 = _mm_xor_si128(tmp3, tmp5);
+ tmp6 = _mm_xor_si128(tmp6, tmp4);
+ tmp7 = _mm_srli_epi32(tmp6, 31);
+ tmp8 = _mm_srli_epi32(tmp6, 30);
+ tmp9 = _mm_srli_epi32(tmp6, 25);
+ tmp7 = _mm_xor_si128(tmp7, tmp8);
+ tmp7 = _mm_xor_si128(tmp7, tmp9);
+ tmp8 = _mm_shuffle_epi32(tmp7, 147);
+
+ tmp7 = _mm_and_si128(XMMMASK, tmp8);
+ tmp8 = _mm_andnot_si128(XMMMASK, tmp8);
+ tmp3 = _mm_xor_si128(tmp3, tmp8);
+ tmp6 = _mm_xor_si128(tmp6, tmp7);
+ tmp10 = _mm_slli_epi32(tmp6, 1);
+ tmp3 = _mm_xor_si128(tmp3, tmp10);
+ tmp11 = _mm_slli_epi32(tmp6, 2);
+ tmp3 = _mm_xor_si128(tmp3, tmp11);
+ tmp12 = _mm_slli_epi32(tmp6, 7);
+ tmp3 = _mm_xor_si128(tmp3, tmp12);
+
+ *res = _mm_xor_si128(tmp3, tmp6);
+}
+
+void
+AES_GCM_encrypt(const unsigned char *in,
+ unsigned char *out,
+ const unsigned char *addt,
+ const unsigned char *ivec,
+ unsigned char *tag,
+ int nbytes,
+ int abytes,
+ int ibytes,
+ const unsigned char *key,
+ int nr)
+{
+ int i , j, k;
+ __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ;
+ __m128i tmp1 , tmp2, tmp3, tmp4;
+ __m128i H , T;
+ __m128i *KEY = (__m128i *) key;
+ __m128i ctr1 , ctr2, ctr3, ctr4;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i ONE = _mm_set_epi32(0, 1, 0, 0);
+ __m128i FOUR = _mm_set_epi32(0, 4, 0, 0);
+ __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5,
+ 6, 7);
+ /*
+ * __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ * 10, 11, 12, 13, 14, 15);
+ */
+ __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128();
+ __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);
+ __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800);
+ __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000);
+ __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
+
+ if (ibytes == 96 / 8) {
+ Y = _mm_loadu_si128((__m128i *) ivec);
+ Y = _mm_insert_epi32(Y, 0x1000000, 3);
+ /* (Compute E[ZERO, KS] and E[Y0, KS] together */
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ tmp2 = _mm_xor_si128(Y, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ T = _mm_aesenclast_si128(tmp2, KEY[nr]);
+ REFLECT(H);
+ } else {
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ for (j = 1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ REFLECT(H);
+ Y = _mm_xor_si128(Y, Y);
+ for (i = 0; i < ibytes / 16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]);
+ REFLECT(tmp1);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ }
+ if (ibytes % 16) {
+ for (j = 0; j < ibytes % 16; j++)
+ ((unsigned char *)&last_block)[j] = ivec[i * 16 + j];
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, 0, 1);
+ REFLECT(tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, MASKF);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ REFLECT(Y);
+ /* Compute E(K, Y0) */
+ tmp1 = _mm_xor_si128(Y, KEY[0]);
+ for (j = 1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ T = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ }
+
+ for (i = 0; i < abytes / 16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]);
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ if (abytes % 16) {
+ last_block = _mm_setzero_si128();
+ for (j = 0; j < abytes % 16; j++)
+ ((unsigned char *)&last_block)[j] = addt[i * 16 + j];
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ ctr2 = _mm_add_epi64(ctr1, ONE);
+ ctr3 = _mm_add_epi64(ctr2, ONE);
+ ctr4 = _mm_add_epi64(ctr3, ONE);
+ for (i = 0; i < nbytes / 16 / 4; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
+ tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
+ tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, FOUR);
+ ctr2 = _mm_add_epi64(ctr2, FOUR);
+ ctr3 = _mm_add_epi64(ctr3, FOUR);
+ ctr4 = _mm_add_epi64(ctr4, FOUR);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp2 = _mm_xor_si128(tmp2, KEY[0]);
+ tmp3 = _mm_xor_si128(tmp3, KEY[0]);
+ tmp4 = _mm_xor_si128(tmp4, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]);
+ tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]);
+ tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0]));
+ tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1]));
+ tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2]));
+ tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3]));
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4);
+ REFLECT(tmp1);
+ REFLECT(tmp2);
+ REFLECT(tmp3);
+ REFLECT(tmp4);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ X = _mm_xor_si128(X, tmp2);
+ gfmul_decrypt(X, H, &X);
+ X = _mm_xor_si128(X, tmp3);
+ gfmul_decrypt(X, H, &X);
+ X = _mm_xor_si128(X, tmp4);
+ gfmul_decrypt(X, H, &X);
+ }
+ for (k = i * 4; k < nbytes / 16; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k]));
+ _mm_storeu_si128(&((__m128i *) out)[k], tmp1);
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ //If one partial block remains
+ if (nbytes % 16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k]));
+ last_block = tmp1;
+ for (j = 0; j < nbytes % 16; j++)
+ out[k * 16 + j] = ((unsigned char *)&last_block)[j];
+ for (; j < 16; j++)
+ ((unsigned char *)&last_block)[j] = 0;
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1);
+ REFLECT(tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, MASKF);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ REFLECT(X);
+ T = _mm_xor_si128(X, T);
+ _mm_storeu_si128((__m128i *) tag, T);
+}
+
+int
+AES_GCM_decrypt(const unsigned char *in,
+ unsigned char *out,
+ const unsigned char *addt,
+ const unsigned char *ivec,
+ unsigned char *tag,
+ int nbytes,
+ int abytes,
+ int ibytes,
+ const unsigned char *key,
+ int nr)
+{
+ int i , j, k;
+ __m128i hlp1 /* , hlp2, hlp3, hlp4 */ ;
+ __m128i tmp1 , tmp2, tmp3, tmp4;
+ __m128i H , T;
+ __m128i *KEY = (__m128i *) key;
+ __m128i ctr1 , ctr2, ctr3, ctr4;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i ONE = _mm_set_epi32(0, 1, 0, 0);
+ __m128i FOUR = _mm_set_epi32(0, 4, 0, 0);
+ __m128i BSWAP_EPI64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5,
+ 6, 7);
+ __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+ 14, 15);
+ __m128i X = _mm_setzero_si128(), Y = _mm_setzero_si128();
+ __m128i AMASK = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);
+ __m128i MASKL = _mm_set_epi32(0x0f070b03, 0x0d050901, 0x0e060a02, 0x0c040800);
+ __m128i MASKH = _mm_set_epi32(0xf070b030, 0xd0509010, 0xe060a020, 0xc0408000);
+ __m128i MASKF = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
+
+ if (ibytes == 96 / 8) {
+ Y = _mm_loadu_si128((__m128i *) ivec);
+ Y = _mm_insert_epi32(Y, 0x1000000, 3);
+ /* (Compute E[ZERO, KS] and E[Y0, KS] together */
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ tmp2 = _mm_xor_si128(Y, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]);
+ };
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ T = _mm_aesenclast_si128(tmp2, KEY[nr]);
+ REFLECT(H);
+ } else {
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ for (j = 1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ REFLECT(H);
+ Y = _mm_xor_si128(Y, Y);
+ for (i = 0; i < ibytes / 16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i *) ivec)[i]);
+ REFLECT(tmp1);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ }
+ if (ibytes % 16) {
+ for (j = 0; j < ibytes % 16; j++)
+ ((unsigned char *)&last_block)[j] = ivec[i * 16 + j];
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, ibytes * 8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, 0, 1);
+ REFLECT(tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, MASKF);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul_decrypt(Y, H, &Y);
+ REFLECT(Y);
+ /* Compute E(K, Y0) */
+ tmp1 = _mm_xor_si128(Y, KEY[0]);
+ for (j = 1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ T = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ }
+ for (i = 0; i < abytes / 16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i *) addt)[i]);
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ if (abytes % 16) {
+ last_block = _mm_setzero_si128();
+ for (j = 0; j < abytes % 16; j++)
+ ((unsigned char *)&last_block)[j] = addt[i * 16 + j];
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ for (i = 0; i < nbytes / 16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i *) in)[i]);
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ if (nbytes % 16) {
+ last_block = _mm_setzero_si128();
+ for (j = 0; j < nbytes % 16; j++)
+ ((unsigned char *)&last_block)[j] = in[i * 16 + j];
+ tmp1 = last_block;
+ REFLECT(tmp1);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, nbytes * 8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, abytes * 8, 1);
+ REFLECT(tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, MASKF);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul_decrypt(X, H, &X);
+ REFLECT(X);
+ T = _mm_xor_si128(X, T);
+ if (!m128icmp(T, _mm_loadu_si128((__m128i*)tag)))
+ return 0; //in case the authentication failed
+
+ //in case the authentication failed
+ ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ ctr2 = _mm_add_epi64(ctr1, ONE);
+ ctr3 = _mm_add_epi64(ctr2, ONE);
+ ctr4 = _mm_add_epi64(ctr3, ONE);
+ for (i = 0; i < nbytes / 16 / 4; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
+ tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
+ tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, FOUR);
+ ctr2 = _mm_add_epi64(ctr2, FOUR);
+ ctr3 = _mm_add_epi64(ctr3, FOUR);
+ ctr4 = _mm_add_epi64(ctr4, FOUR);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp2 = _mm_xor_si128(tmp2, KEY[0]);
+ tmp3 = _mm_xor_si128(tmp3, KEY[0]);
+ tmp4 = _mm_xor_si128(tmp4, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j + 1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j + 1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr - 1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[nr - 1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp2 = _mm_aesenclast_si128(tmp2, KEY[nr]);
+ tmp3 = _mm_aesenclast_si128(tmp3, KEY[nr]);
+ tmp4 = _mm_aesenclast_si128(tmp4, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 0]));
+ tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 1]));
+ tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 2]));
+ tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i *) in)[i * 4 + 3]));
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 0], tmp1);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 1], tmp2);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 2], tmp3);
+ _mm_storeu_si128(&((__m128i *) out)[i * 4 + 3], tmp4);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+ }
+ for (k = i * 4; k < nbytes / 16; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k]));
+ _mm_storeu_si128(&((__m128i *) out)[k], tmp1);
+ }
+ //If one partial block remains
+ if (nbytes % 16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j = 1; j < nr - 1; j += 2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j + 1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr - 1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i *) in)[k]));
+ last_block = tmp1;
+ for (j = 0; j < nbytes % 16; j++)
+ out[k * 16 + j] = ((unsigned char *)&last_block)[j];
+ }
+ return 1;
+ //when sucessfull returns 1
+}
diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c
index 39819a6..83d79fc 100644
--- a/sys/crypto/aesni/aesni_wrap.c
+++ b/sys/crypto/aesni/aesni_wrap.c
@@ -176,10 +176,6 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
}
}
-#define AES_XTS_BLOCKSIZE 16
-#define AES_XTS_IVSIZE 8
-#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
-
static inline __m128i
xts_crank_lfsr(__m128i inp)
{
@@ -347,6 +343,23 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
ses->rounds = AES256_ROUNDS;
break;
default:
+ printf("invalid CBC/GCM key length");
+ return (EINVAL);
+ }
+ break;
+ case CRYPTO_AES_RFC4106_GCM_16:
+ switch (keylen) {
+ case 160:
+ ses->rounds = AES128_ROUNDS;
+ break;
+ case 224:
+ ses->rounds = AES192_ROUNDS;
+ break;
+ case 288:
+ ses->rounds = AES256_ROUNDS;
+ break;
+ default:
+ printf("invalid CBC/GCM key length");
return (EINVAL);
}
break;
@@ -359,6 +372,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
ses->rounds = AES256_ROUNDS;
break;
default:
+ printf("invalid XTS key length");
return (EINVAL);
}
break;
@@ -368,11 +382,20 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
- if (ses->algo == CRYPTO_AES_CBC)
- arc4rand(ses->iv, sizeof(ses->iv), 0);
- else /* if (ses->algo == CRYPTO_AES_XTS) */ {
+
+ /* setup IV */
+ switch (ses->algo) {
+ case CRYPTO_AES_CBC:
+ /* Nothing todo */
+ break;
+ case CRYPTO_AES_RFC4106_GCM_16:
+ bcopy(key + ((keylen - 32) / 8), ses->nonce, AESCTR_NONCESIZE);
+ arc4rand((void *)&ses->aesgcmcounter, sizeof(uint64_t), 0);
+ break;
+ case CRYPTO_AES_XTS:
aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
ses->rounds);
+ break;
}
return (0);
diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile
index 26dbedc..e66f941 100644
--- a/sys/modules/aesni/Makefile
+++ b/sys/modules/aesni/Makefile
@@ -15,5 +15,13 @@ aesni_wrap.o: aesni_wrap.c
-mmmx -msse -maes ${.IMPSRC}
${CTFCONVERT_CMD}
+.if ${MACHINE_CPUARCH} == "amd64"
+OBJS+= aesni_ghash.o
+aesni_ghash.o: aesni_ghash.c
+ ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \
+ -mmmx -mpclmul -msse -msse4.1 -maes ${.IMPSRC}
+ ${CTFCONVERT_CMD}
+.endif
+
.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud