diff options
Diffstat (limited to 'sys/crypto')
-rw-r--r-- | sys/crypto/aesni/aesencdec.h | 24 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.c | 353 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.h | 15 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_ghash.c | 804 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_os.h | 33 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_wrap.c | 129 | ||||
-rw-r--r-- | sys/crypto/rijndael/rijndael-api-fst.c | 19 | ||||
-rw-r--r-- | sys/crypto/rijndael/rijndael-api-fst.h | 18 | ||||
-rw-r--r-- | sys/crypto/sha2/sha2.c | 348 | ||||
-rw-r--r-- | sys/crypto/sha2/sha2.h | 59 | ||||
-rw-r--r-- | sys/crypto/sha2/sha256.h | 86 | ||||
-rw-r--r-- | sys/crypto/sha2/sha256c.c | 316 | ||||
-rw-r--r-- | sys/crypto/via/padlock_hash.c | 12 |
13 files changed, 1708 insertions, 508 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h index 5e4f128..80951a4 100644 --- a/sys/crypto/aesni/aesencdec.h +++ b/sys/crypto/aesni/aesencdec.h @@ -1,5 +1,6 @@ /*- * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,11 @@ * */ +#ifndef _AESENCDEC_H_ +#define _AESENCDEC_H_ + +#include <crypto/aesni/aesni_os.h> + #include <wmmintrin.h> static inline void @@ -103,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a, out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); } +/* rounds is passed in as rounds - 1 */ static inline __m128i aesni_enc(int rounds, const __m128i *keysched, const __m128i from) { @@ -110,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from) int i; tmp = from ^ keysched[0]; - - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesenc_si128(tmp, keysched[i]); tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); + } - return _mm_aesenclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesenc_si128(tmp, keysched[rounds]); + return _mm_aesenclast_si128(tmp, keysched[rounds + 1]); } static inline __m128i @@ -125,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from) tmp = from ^ keysched[0]; - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesdec_si128(tmp, keysched[i]); tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); + } - return _mm_aesdeclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesdec_si128(tmp, keysched[rounds]); + return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]); } + +#endif /* _AESENCDEC_H_ */ diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index 7d7a740..602247a 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -1,8 +1,13 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek <pjd@FreeBSD.org> * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -39,16 +44,37 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/bus.h> #include <sys/uio.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/smp.h> #include <crypto/aesni/aesni.h> #include <cryptodev_if.h> +#include <opencrypto/gmac.h> + +static struct mtx_padalign *ctx_mtx; +static struct fpu_kern_ctx **ctx_fpu; struct aesni_softc { + int dieing; int32_t cid; uint32_t sid; TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; struct rwlock lock; }; +#define AQUIRE_CTX(i, ctx) \ + do { \ + (i) = PCPU_GET(cpuid); \ + mtx_lock(&ctx_mtx[(i)]); \ + (ctx) = ctx_fpu[(i)]; \ + } while (0) +#define RELEASE_CTX(i, ctx) \ + do { \ + mtx_unlock(&ctx_mtx[(i)]); \ + (i) = -1; \ + (ctx) = NULL; \ + } while (0) + static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); static int aesni_freesession(device_t, uint64_t tid); static void aesni_freesession_locked(struct aesni_softc *sc, @@ -56,7 +82,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc, static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); static int aesni_cipher_process(struct aesni_session *ses, - struct cryptodesc *enccrd, struct cryptop *crp); + struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); @@ -79,23 +105,45 @@ aesni_probe(device_t dev) return (EINVAL); } - if ((cpu_feature & CPUID_SSE2) == 0) { - device_printf(dev, "No SSE2 support but AESNI!?!\n"); + if ((cpu_feature2 & CPUID2_SSE41) == 0) { + device_printf(dev, "No SSE4.1 support.\n"); return (EINVAL); } - device_set_desc_copy(dev, "AES-CBC,AES-XTS"); + device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM"); return (0); } +static void +aensi_cleanctx(void) +{ + int i; + + /* XXX - no way to return driverid */ + CPU_FOREACH(i) { + if (ctx_fpu[i] != NULL) { + mtx_destroy(&ctx_mtx[i]); + fpu_kern_free_ctx(ctx_fpu[i]); + } + ctx_fpu[i] = NULL; + } + free(ctx_mtx, M_AESNI); + ctx_mtx = NULL; + free(ctx_fpu, M_AESNI); + ctx_fpu = NULL; +} + static int aesni_attach(device_t dev) { struct aesni_softc *sc; + int i; sc = device_get_softc(dev); + sc->dieing = 0; TAILQ_INIT(&sc->sessions); sc->sid = 1; + sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { @@ -103,8 +151,23 @@ aesni_attach(device_t dev) return (ENOMEM); } + ctx_mtx = malloc(sizeof *ctx_mtx * (mp_maxid + 1), M_AESNI, + M_WAITOK|M_ZERO); + ctx_fpu = malloc(sizeof *ctx_fpu * (mp_maxid + 1), M_AESNI, + M_WAITOK|M_ZERO); + + CPU_FOREACH(i) { + ctx_fpu[i] = fpu_kern_alloc_ctx(0); + mtx_init(&ctx_mtx[i], "anifpumtx", NULL, MTX_DEF); + } + rw_init(&sc->lock, "aesni_lock"); crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); return (0); } @@ -116,6 +179,7 @@ aesni_detach(device_t dev) struct aesni_session *ses; sc = device_get_softc(dev); + rw_wlock(&sc->lock); TAILQ_FOREACH(ses, &sc->sessions, next) { if (ses->used) { @@ -125,14 +189,18 @@ aesni_detach(device_t dev) return (EBUSY); } } + sc->dieing = 1; while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); - fpu_kern_free_ctx(ses->fpu_ctx); free(ses, M_AESNI); } rw_wunlock(&sc->lock); - rw_destroy(&sc->lock); crypto_unregister_all(sc->cid); + + rw_destroy(&sc->lock); + + aensi_cleanctx(); + return (0); } @@ -144,28 +212,52 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct cryptoini *encini; int error; - if (sidp == NULL || cri == NULL) + if (sidp == NULL || cri == NULL) { + CRYPTDEB("no sidp or cri"); return (EINVAL); + } sc = device_get_softc(dev); + if (sc->dieing) + return (EINVAL); + ses = NULL; encini = NULL; for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: - if (encini != NULL) + case CRYPTO_AES_NIST_GCM_16: + if (encini != NULL) { + CRYPTDEB("encini already set"); return (EINVAL); + } encini = cri; break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + /* + * nothing to do here, maybe in the future cache some + * values for GHASH + */ + break; default: + CRYPTDEB("unhandled algorithm"); return (EINVAL); } } - if (encini == NULL) + if (encini == NULL) { + CRYPTDEB("no cipher"); return (EINVAL); + } rw_wlock(&sc->lock); + if (sc->dieing) { + rw_wunlock(&sc->lock); + return (EINVAL); + } /* * Free sessions goes first, so if first session is used, we need to * allocate one. @@ -177,13 +269,6 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) rw_wunlock(&sc->lock); return (ENOMEM); } - ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | - FPU_KERN_NOWAIT); - if (ses->fpu_ctx == NULL) { - free(ses, M_AESNI); - rw_wunlock(&sc->lock); - return (ENOMEM); - } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); @@ -195,6 +280,7 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) error = aesni_cipher_setup(ses, encini); if (error != 0) { + CRYPTDEB("setup failed"); rw_wlock(&sc->lock); aesni_freesession_locked(sc, ses); rw_wunlock(&sc->lock); @@ -208,15 +294,14 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) { - struct fpu_kern_ctx *ctx; uint32_t sid; + rw_assert(&sc->lock, RA_WLOCKED); + sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); - ctx = ses->fpu_ctx; - bzero(ses, sizeof(*ses)); + *ses = (struct aesni_session){}; ses->id = sid; - ses->fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } @@ -248,11 +333,13 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; - struct cryptodesc *crd, *enccrd; - int error; + struct cryptodesc *crd, *enccrd, *authcrd; + int error, needauth; error = 0; enccrd = NULL; + authcrd = NULL; + needauth = 0; /* Sanity check. */ if (crp == NULL) @@ -266,6 +353,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: if (enccrd != NULL) { error = EINVAL; @@ -273,11 +361,41 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) } enccrd = crd; break; + + case CRYPTO_AES_NIST_GCM_16: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + needauth = 1; + break; + + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + if (authcrd != NULL) { + error = EINVAL; + goto out; + } + authcrd = crd; + needauth = 1; + break; + default: - return (EINVAL); + error = EINVAL; + goto out; } } - if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + + if (enccrd == NULL || (needauth && authcrd == NULL)) { + error = EINVAL; + goto out; + } + + /* CBC & XTS can only handle full blocks for now */ + if ((enccrd->crd_alg == CRYPTO_AES_CBC || enccrd->crd_alg == + CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } @@ -293,7 +411,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) goto out; } - error = aesni_cipher_process(ses, enccrd, crp); + error = aesni_cipher_process(ses, enccrd, authcrd, crp); if (error != 0) goto out; @@ -307,21 +425,26 @@ uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { + struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; - if (crp->crp_flags & CRYPTO_F_IMBUF) - goto alloc; - else if (crp->crp_flags & CRYPTO_F_IOV) { + if (crp->crp_flags & CRYPTO_F_IMBUF) { + m = (struct mbuf *)crp->crp_buf; + if (m->m_next != NULL) + goto alloc; + addr = mtod(m, uint8_t *); + } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; - addr = (u_char *)iov->iov_base + enccrd->crd_skip; + addr = (uint8_t *)iov->iov_base; } else - addr = (u_char *)crp->crp_buf; + addr = (uint8_t *)crp->crp_buf; *allocated = 0; + addr += enccrd->crd_skip; return (addr); alloc: @@ -362,37 +485,74 @@ MODULE_DEPEND(aesni, crypto, 1, 1, 1); static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) { - struct thread *td; + struct fpu_kern_ctx *ctx; int error; + int kt, ctxidx; + + kt = is_fpu_kern_thread(0); + if (!kt) { + AQUIRE_CTX(ctxidx, ctx); + error = fpu_kern_enter(curthread, ctx, + FPU_KERN_NORMAL | FPU_KERN_KTHR); + if (error != 0) + goto out; + } - td = curthread; - error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - return (error); error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); - fpu_kern_leave(td, ses->fpu_ctx); + + if (!kt) { + fpu_kern_leave(curthread, ctx); +out: + RELEASE_CTX(ctxidx, ctx); + } return (error); } +/* + * authcrd contains the associated date. + */ static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, - struct cryptop *crp) + struct cryptodesc *authcrd, struct cryptop *crp) { - struct thread *td; - uint8_t *buf; - int error, allocated; + struct fpu_kern_ctx *ctx; + uint8_t iv[AES_BLOCK_LEN]; + uint8_t tag[GMAC_DIGEST_LEN]; + uint8_t *buf, *authbuf; + int error, allocated, authallocated; + int ivlen, encflag; + int kt, ctxidx; + + encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; + + if ((enccrd->crd_alg == CRYPTO_AES_ICM || + enccrd->crd_alg == CRYPTO_AES_NIST_GCM_16) && + (enccrd->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); - td = curthread; - error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - goto out1; + authbuf = NULL; + authallocated = 0; + if (authcrd != NULL) { + authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); + if (authbuf == NULL) { + error = ENOMEM; + goto out1; + } + } + + kt = is_fpu_kern_thread(0); + if (!kt) { + AQUIRE_CTX(ctxidx, ctx); + error = fpu_kern_enter(curthread, ctx, + FPU_KERN_NORMAL|FPU_KERN_KTHR); + if (error != 0) + goto out2; + } if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { error = aesni_cipher_setup_common(ses, enccrd->crd_key, @@ -401,48 +561,105 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, goto out; } - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + /* XXX - validate that enccrd and authcrd have/use same key? */ + switch (enccrd->crd_alg) { + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + ivlen = AES_BLOCK_LEN; + break; + case CRYPTO_AES_XTS: + ivlen = 8; + break; + case CRYPTO_AES_NIST_GCM_16: + ivlen = 12; /* should support arbitarily larger */ + break; + } + + /* Setup iv */ + if (encflag) { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + bcopy(enccrd->crd_iv, iv, ivlen); + else + arc4rand(iv, ivlen, 0); + if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { - aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, - enccrd->crd_len, buf, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { - aesni_encrypt_xts(ses->rounds, ses->enc_schedule, - ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); - } + enccrd->crd_inject, ivlen, iv); } else { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + bcopy(enccrd->crd_iv, iv, ivlen); else crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { + enccrd->crd_inject, ivlen, iv); + } + + if (authcrd != NULL && !encflag) + crypto_copydata(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + else + bzero(tag, sizeof tag); + + /* Do work */ + switch (ses->algo) { + case CRYPTO_AES_CBC: + if (encflag) + aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, iv); + else aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, - enccrd->crd_len, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { + enccrd->crd_len, buf, iv); + break; + case CRYPTO_AES_ICM: + /* encryption & decryption are the same */ + aesni_encrypt_icm(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, iv); + break; + case CRYPTO_AES_XTS: + if (encflag) + aesni_encrypt_xts(ses->rounds, ses->enc_schedule, + ses->xts_schedule, enccrd->crd_len, buf, buf, + iv); + else aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); + iv); + break; + case CRYPTO_AES_NIST_GCM_16: + if (encflag) + AES_GCM_encrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds); + else { + if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds)) + error = EBADMSG; } + break; } + if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, - AES_BLOCK_LEN, ses->iv); + + if (!error && authcrd != NULL) { + crypto_copyback(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + } + out: - fpu_kern_leave(td, ses->fpu_ctx); + if (!kt) { + fpu_kern_leave(curthread, ctx); +out2: + RELEASE_CTX(ctxidx, ctx); + } + out1: if (allocated) { bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } + if (authallocated) + free(authbuf, M_AESNI); return (error); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index ff1d1a2..3d5adec 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -56,7 +56,6 @@ struct aesni_session { uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16); - uint8_t iv[AES_BLOCK_LEN]; int algo; int rounds; /* uint8_t *ses_ictx; */ @@ -65,7 +64,6 @@ struct aesni_session { int used; uint32_t id; TAILQ_ENTRY(aesni_session) next; - struct fpu_kern_ctx *fpu_ctx; }; /* @@ -88,6 +86,9 @@ void aesni_encrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); void aesni_decrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); +void aesni_encrypt_icm(int rounds, const void *key_schedule /*__aligned(16)*/, + size_t len, const uint8_t *from, uint8_t *to, + const uint8_t iv[AES_BLOCK_LEN]); void aesni_encrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, @@ -96,6 +97,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +/* GCM & GHASH functions */ +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); +int AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); + int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen); uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c new file mode 100644 index 0000000..54c8815 --- /dev/null +++ b/sys/crypto/aesni/aesni_ghash.c @@ -0,0 +1,804 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * $FreeBSD$ + * + */ + +/* + * Figure 5, 8 and 12 are copied from the Intel white paper: + * Intel® Carry-Less Multiplication Instruction and its Usage for + * Computing the GCM Mode + * + * and as such are: + * Copyright © 2010 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include <crypto/aesni/aesni.h> +#include <crypto/aesni/aesni_os.h> +#else +#include <stdint.h> +#endif + +#include <wmmintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> + +static inline int +m128icmp(__m128i a, __m128i b) +{ + __m128i cmp; + + cmp = _mm_cmpeq_epi32(a, b); + + return _mm_movemask_epi8(cmp) == 0xffff; +} + +#ifdef __i386__ +static inline __m128i +_mm_insert_epi64(__m128i a, int64_t b, const int ndx) +{ + + if (!ndx) { + a = _mm_insert_epi32(a, b, 0); + a = _mm_insert_epi32(a, b >> 32, 1); + } else { + a = _mm_insert_epi32(a, b, 2); + a = _mm_insert_epi32(a, b >> 32, 3); + } + + return a; +} +#endif + +/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */ + +/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */ +static void +gfmul(__m128i a, __m128i b, __m128i *res) +{ + __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp3 = _mm_clmulepi64_si128(a, b, 0x00); + tmp4 = _mm_clmulepi64_si128(a, b, 0x10); + tmp5 = _mm_clmulepi64_si128(a, b, 0x01); + tmp6 = _mm_clmulepi64_si128(a, b, 0x11); + + tmp4 = _mm_xor_si128(tmp4, tmp5); + tmp5 = _mm_slli_si128(tmp4, 8); + tmp4 = _mm_srli_si128(tmp4, 8); + tmp3 = _mm_xor_si128(tmp3, tmp5); + tmp6 = _mm_xor_si128(tmp6, tmp4); + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 8. Code Sample - Performing Ghash Using an Aggregated Reduction + * Method */ +static void +reduce4(__m128i H1, __m128i H2, __m128i H3, __m128i H4, + __m128i X1, __m128i X2, __m128i X3, __m128i X4, __m128i *res) +{ + /*algorithm by Krzysztof Jankowski, Pierre Laurent - Intel*/ + __m128i H1_X1_lo, H1_X1_hi, H2_X2_lo, H2_X2_hi, H3_X3_lo, + H3_X3_hi, H4_X4_lo, H4_X4_hi, lo, hi; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i tmp4, tmp5, tmp6, tmp7; + __m128i tmp8, tmp9; + + H1_X1_lo = _mm_clmulepi64_si128(H1, X1, 0x00); + H2_X2_lo = _mm_clmulepi64_si128(H2, X2, 0x00); + H3_X3_lo = _mm_clmulepi64_si128(H3, X3, 0x00); + H4_X4_lo = _mm_clmulepi64_si128(H4, X4, 0x00); + + lo = _mm_xor_si128(H1_X1_lo, H2_X2_lo); + lo = _mm_xor_si128(lo, H3_X3_lo); + lo = _mm_xor_si128(lo, H4_X4_lo); + + H1_X1_hi = _mm_clmulepi64_si128(H1, X1, 0x11); + H2_X2_hi = _mm_clmulepi64_si128(H2, X2, 0x11); + H3_X3_hi = _mm_clmulepi64_si128(H3, X3, 0x11); + H4_X4_hi = _mm_clmulepi64_si128(H4, X4, 0x11); + + hi = _mm_xor_si128(H1_X1_hi, H2_X2_hi); + hi = _mm_xor_si128(hi, H3_X3_hi); + hi = _mm_xor_si128(hi, H4_X4_hi); + + tmp0 = _mm_shuffle_epi32(H1, 78); + tmp4 = _mm_shuffle_epi32(X1, 78); + tmp0 = _mm_xor_si128(tmp0, H1); + tmp4 = _mm_xor_si128(tmp4, X1); + tmp1 = _mm_shuffle_epi32(H2, 78); + tmp5 = _mm_shuffle_epi32(X2, 78); + tmp1 = _mm_xor_si128(tmp1, H2); + tmp5 = _mm_xor_si128(tmp5, X2); + tmp2 = _mm_shuffle_epi32(H3, 78); + tmp6 = _mm_shuffle_epi32(X3, 78); + tmp2 = _mm_xor_si128(tmp2, H3); + tmp6 = _mm_xor_si128(tmp6, X3); + tmp3 = _mm_shuffle_epi32(H4, 78); + tmp7 = _mm_shuffle_epi32(X4, 78); + tmp3 = _mm_xor_si128(tmp3, H4); + tmp7 = _mm_xor_si128(tmp7, X4); + + tmp0 = _mm_clmulepi64_si128(tmp0, tmp4, 0x00); + tmp1 = _mm_clmulepi64_si128(tmp1, tmp5, 0x00); + tmp2 = _mm_clmulepi64_si128(tmp2, tmp6, 0x00); + tmp3 = _mm_clmulepi64_si128(tmp3, tmp7, 0x00); + + tmp0 = _mm_xor_si128(tmp0, lo); + tmp0 = _mm_xor_si128(tmp0, hi); + tmp0 = _mm_xor_si128(tmp1, tmp0); + tmp0 = _mm_xor_si128(tmp2, tmp0); + tmp0 = _mm_xor_si128(tmp3, tmp0); + + tmp4 = _mm_slli_si128(tmp0, 8); + tmp0 = _mm_srli_si128(tmp0, 8); + + lo = _mm_xor_si128(tmp4, lo); + hi = _mm_xor_si128(tmp0, hi); + + tmp3 = lo; + tmp6 = hi; + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 12. AES-GCM: Processing Four Blocks in Parallel with Aggregated + * Every Four Blocks + */ +/* + * per NIST SP-800-38D, 5.2.1.1, len(p) <= 2^39-256 (in bits), or + * 2^32-256*8*16 bytes. + */ +void +AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + + tmp5 = _mm_xor_si128(X, tmp5); + reduce4(H, H2, H3, H4, tmp8, tmp7, tmp6, tmp5, &X); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + for ((void)j; j<16; j++) + ((unsigned char*)&last_block)[j] = 0; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + _mm_storeu_si128((__m128i*)tag, T); +} + +/* My modification of _encrypt to be _decrypt */ +int +AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + /* This is where we validate the cipher text before decrypt */ + for (i = 0; i<nbytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)in)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)in)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)in)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i = i*4; i<nbytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + if (nbytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<nbytes%16; j++) + ((unsigned char*)&last_block)[j] = in[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + + if (!m128icmp(T, _mm_loadu_si128((const __m128i*)tag))) + return 0; //in case the authentication failed + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + } + return 1; //when sucessfull returns 1 +} diff --git a/sys/crypto/aesni/aesni_os.h b/sys/crypto/aesni/aesni_os.h new file mode 100644 index 0000000..273c3f9 --- /dev/null +++ b/sys/crypto/aesni/aesni_os.h @@ -0,0 +1,33 @@ +/*- + * Copyright 2015 Craig Rodrigues <rodrigc@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#if defined(__GNUC__) && defined(_KERNEL) +/* Suppress inclusion of gcc's mm_malloc.h header */ +#define _MM_MALLOC_H_INCLUDED 1 +#endif diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 39819a6..e5e2a69 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -3,8 +3,13 @@ * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -29,15 +34,18 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); - + #include <sys/param.h> #include <sys/libkern.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/systm.h> #include <crypto/aesni/aesni.h> - + +#include <opencrypto/gmac.h> + #include "aesencdec.h" +#include <smmintrin.h> MALLOC_DECLARE(M_AESNI); @@ -176,6 +184,104 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, } } +/* + * mixed endian increment, low 64bits stored in hi word to be compatible + * with _icm's BSWAP. + */ +static inline __m128i +nextc(__m128i x) +{ + const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0); + const __m128i ZERO = _mm_setzero_si128(); + + x = _mm_add_epi64(x, ONE); + __m128i t = _mm_cmpeq_epi64(x, ZERO); + t = _mm_unpackhi_epi64(t, ZERO); + x = _mm_sub_epi64(x, t); + + return x; +} + +void +aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) +{ + __m128i tot; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i BSWAP_EPI64; + __m128i tout[8]; + struct blocks8 *top; + const struct blocks8 *blks; + size_t i, cnt; + + BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + + ctr1 = _mm_loadu_si128((__m128i*)iv); + ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr2 = nextc(ctr1); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + ctr3 = nextc(ctr2); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + ctr4 = nextc(ctr3); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + ctr5 = nextc(ctr4); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + ctr6 = nextc(ctr5); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + ctr7 = nextc(ctr6); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + ctr8 = nextc(ctr7); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + ctr1 = nextc(ctr8); + + blks = (const struct blocks8 *)from; + top = (struct blocks8 *)to; + aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4, + tmp5, tmp6, tmp7, tmp8, tout); + + top->blk[0] = blks->blk[0] ^ tout[0]; + top->blk[1] = blks->blk[1] ^ tout[1]; + top->blk[2] = blks->blk[2] ^ tout[2]; + top->blk[3] = blks->blk[3] ^ tout[3]; + top->blk[4] = blks->blk[4] ^ tout[4]; + top->blk[5] = blks->blk[5] ^ tout[5]; + top->blk[6] = blks->blk[6] ^ tout[6]; + top->blk[7] = blks->blk[7] ^ tout[7]; + + from += AES_BLOCK_LEN * 8; + to += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = nextc(ctr1); + + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + _mm_storeu_si128((__m128i *)to, tot); + + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } + + /* handle remaining partial round */ + if (len % AES_BLOCK_LEN != 0) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + memcpy(to, &tot, len % AES_BLOCK_LEN); + } +} + #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -333,8 +439,15 @@ int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen) { + int decsched; + + decsched = 1; switch (ses->algo) { + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + decsched = 0; + /* FALLTHROUGH */ case CRYPTO_AES_CBC: switch (keylen) { case 128: @@ -347,6 +460,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid CBC/ICM/GCM key length"); return (EINVAL); } break; @@ -359,6 +473,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid XTS key length"); return (EINVAL); } break; @@ -367,13 +482,13 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, } aesni_set_enckey(key, ses->enc_schedule, ses->rounds); - aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); - if (ses->algo == CRYPTO_AES_CBC) - arc4rand(ses->iv, sizeof(ses->iv), 0); - else /* if (ses->algo == CRYPTO_AES_XTS) */ { + if (decsched) + aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, + ses->rounds); + + if (ses->algo == CRYPTO_AES_XTS) aesni_set_enckey(key + keylen / 16, ses->xts_schedule, ses->rounds); - } return (0); } diff --git a/sys/crypto/rijndael/rijndael-api-fst.c b/sys/crypto/rijndael/rijndael-api-fst.c index 187177b..bf7b4d1 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.c +++ b/sys/crypto/rijndael/rijndael-api-fst.c @@ -34,7 +34,8 @@ __FBSDID("$FreeBSD$"); typedef u_int8_t BYTE; -int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) { +int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, + const char *keyMaterial) { u_int8_t cipherKey[RIJNDAEL_MAXKB]; if (key == NULL) { @@ -83,7 +84,7 @@ int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) { } int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -198,7 +199,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, * @return length in octets (not bits) of the encrypted output buffer. */ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16], *iv, *cp; @@ -232,10 +233,10 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, case MODE_CBC: iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - ((u_int32_t*)block)[0] = ((u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; - ((u_int32_t*)block)[1] = ((u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; - ((u_int32_t*)block)[2] = ((u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; - ((u_int32_t*)block)[3] = ((u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; + ((u_int32_t*)block)[0] = ((const u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; + ((u_int32_t*)block)[1] = ((const u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; + ((u_int32_t*)block)[2] = ((const u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; + ((u_int32_t*)block)[3] = ((const u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); iv = outBuffer; input += 16; @@ -261,7 +262,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -360,7 +361,7 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16]; u_int32_t iv[4]; diff --git a/sys/crypto/rijndael/rijndael-api-fst.h b/sys/crypto/rijndael/rijndael-api-fst.h index 122bf52..e5f596a 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.h +++ b/sys/crypto/rijndael/rijndael-api-fst.h @@ -56,18 +56,18 @@ typedef struct { /* changed order of the components */ /* Function prototypes */ -int rijndael_makeKey(keyInstance *, u_int8_t, int, char *); +int rijndael_makeKey(keyInstance *, u_int8_t, int, const char *); int rijndael_cipherInit(cipherInstance *, u_int8_t, char *); -int rijndael_blockEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); -int rijndael_blockDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); #endif /* __RIJNDAEL_API_FST_H */ diff --git a/sys/crypto/sha2/sha2.c b/sys/crypto/sha2/sha2.c index 66ec6e9..5bb52f3 100644 --- a/sys/crypto/sha2/sha2.c +++ b/sys/crypto/sha2/sha2.c @@ -121,20 +121,10 @@ __FBSDID("$FreeBSD$"); * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t * types and pointing out recent ANSI C support for uintXX_t in inttypes.h. */ -#if 0 /*def SHA2_USE_INTTYPES_H*/ - typedef uint8_t sha2_byte; /* Exactly 1 byte */ typedef uint32_t sha2_word32; /* Exactly 4 bytes */ typedef uint64_t sha2_word64; /* Exactly 8 bytes */ -#else /* SHA2_USE_INTTYPES_H */ - -typedef u_int8_t sha2_byte; /* Exactly 1 byte */ -typedef u_int32_t sha2_word32; /* Exactly 4 bytes */ -typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ - -#endif /* SHA2_USE_INTTYPES_H */ - /*** SHA-256/384/512 Various Length Definitions ***********************/ /* NOTE: Most of these are in sha2.h */ @@ -183,8 +173,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ */ /* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ #define R(b,x) ((x) >> (b)) -/* 32-bit Rotate-right (used in SHA-256): */ -#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) /* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ #define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) @@ -192,12 +180,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) -/* Four of six logical functions used in SHA-256: */ -#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) -#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) -#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) -#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) - /* Four of six logical functions used in SHA-384 and SHA-512: */ #define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) #define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) @@ -210,43 +192,10 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ * only. */ static void SHA512_Last(SHA512_CTX*); -static void SHA256_Transform(SHA256_CTX*, const sha2_word32*); static void SHA512_Transform(SHA512_CTX*, const sha2_word64*); /*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ -/* Hash constant words K for SHA-256: */ -static const sha2_word32 K256[64] = { - 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, - 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, - 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, - 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, - 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, - 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, - 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, - 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, - 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, - 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, - 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, - 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, - 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, - 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, - 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, - 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL -}; - -/* Initial hash value H for SHA-256: */ -static const sha2_word32 sha256_initial_hash_value[8] = { - 0x6a09e667UL, - 0xbb67ae85UL, - 0x3c6ef372UL, - 0xa54ff53aUL, - 0x510e527fUL, - 0x9b05688cUL, - 0x1f83d9abUL, - 0x5be0cd19UL -}; - /* Hash constant words K for SHA-384 and SHA-512: */ static const sha2_word64 K512[80] = { 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, @@ -323,301 +272,6 @@ static const char *sha2_hex_digits = "0123456789abcdef"; /*** SHA-256: *********************************************************/ -void SHA256_Init(SHA256_CTX* context) { - if (context == (SHA256_CTX*)0) { - return; - } - bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH); - bzero(context->buffer, SHA256_BLOCK_LENGTH); - context->bitcount = 0; -} - -#ifdef SHA2_UNROLL_TRANSFORM - -/* Unrolled SHA-256 round macros: */ - -#if BYTE_ORDER == LITTLE_ENDIAN - -#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ - REVERSE32(*data++, W256[j]); \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ - K256[j] + W256[j]; \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - - -#else /* BYTE_ORDER == LITTLE_ENDIAN */ - -#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ - K256[j] + (W256[j] = *data++); \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - -#endif /* BYTE_ORDER == LITTLE_ENDIAN */ - -#define ROUND256(a,b,c,d,e,f,g,h) \ - s0 = W256[(j+1)&0x0f]; \ - s0 = sigma0_256(s0); \ - s1 = W256[(j+14)&0x0f]; \ - s1 = sigma1_256(s1); \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \ - (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - -static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { - sha2_word32 a, b, c, d, e, f, g, h, s0, s1; - sha2_word32 T1, *W256; - int j; - - W256 = (sha2_word32*)context->buffer; - - /* Initialize registers with the prev. intermediate value */ - a = context->state[0]; - b = context->state[1]; - c = context->state[2]; - d = context->state[3]; - e = context->state[4]; - f = context->state[5]; - g = context->state[6]; - h = context->state[7]; - - j = 0; - do { - /* Rounds 0 to 15 (unrolled): */ - ROUND256_0_TO_15(a,b,c,d,e,f,g,h); - ROUND256_0_TO_15(h,a,b,c,d,e,f,g); - ROUND256_0_TO_15(g,h,a,b,c,d,e,f); - ROUND256_0_TO_15(f,g,h,a,b,c,d,e); - ROUND256_0_TO_15(e,f,g,h,a,b,c,d); - ROUND256_0_TO_15(d,e,f,g,h,a,b,c); - ROUND256_0_TO_15(c,d,e,f,g,h,a,b); - ROUND256_0_TO_15(b,c,d,e,f,g,h,a); - } while (j < 16); - - /* Now for the remaining rounds to 64: */ - do { - ROUND256(a,b,c,d,e,f,g,h); - ROUND256(h,a,b,c,d,e,f,g); - ROUND256(g,h,a,b,c,d,e,f); - ROUND256(f,g,h,a,b,c,d,e); - ROUND256(e,f,g,h,a,b,c,d); - ROUND256(d,e,f,g,h,a,b,c); - ROUND256(c,d,e,f,g,h,a,b); - ROUND256(b,c,d,e,f,g,h,a); - } while (j < 64); - - /* Compute the current intermediate hash value */ - context->state[0] += a; - context->state[1] += b; - context->state[2] += c; - context->state[3] += d; - context->state[4] += e; - context->state[5] += f; - context->state[6] += g; - context->state[7] += h; - - /* Clean up */ - a = b = c = d = e = f = g = h = T1 = 0; -} - -#else /* SHA2_UNROLL_TRANSFORM */ - -static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { - sha2_word32 a, b, c, d, e, f, g, h, s0, s1; - sha2_word32 T1, T2, *W256; - int j; - - W256 = (sha2_word32*)context->buffer; - - /* Initialize registers with the prev. intermediate value */ - a = context->state[0]; - b = context->state[1]; - c = context->state[2]; - d = context->state[3]; - e = context->state[4]; - f = context->state[5]; - g = context->state[6]; - h = context->state[7]; - - j = 0; - do { -#if BYTE_ORDER == LITTLE_ENDIAN - /* Copy data while converting to host byte order */ - REVERSE32(*data++,W256[j]); - /* Apply the SHA-256 compression function to update a..h */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; -#else /* BYTE_ORDER == LITTLE_ENDIAN */ - /* Apply the SHA-256 compression function to update a..h with copy */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++); -#endif /* BYTE_ORDER == LITTLE_ENDIAN */ - T2 = Sigma0_256(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - - j++; - } while (j < 16); - - do { - /* Part of the message block expansion: */ - s0 = W256[(j+1)&0x0f]; - s0 = sigma0_256(s0); - s1 = W256[(j+14)&0x0f]; - s1 = sigma1_256(s1); - - /* Apply the SHA-256 compression function to update a..h */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + - (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); - T2 = Sigma0_256(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - - j++; - } while (j < 64); - - /* Compute the current intermediate hash value */ - context->state[0] += a; - context->state[1] += b; - context->state[2] += c; - context->state[3] += d; - context->state[4] += e; - context->state[5] += f; - context->state[6] += g; - context->state[7] += h; - - /* Clean up */ - a = b = c = d = e = f = g = h = T1 = T2 = 0; -} - -#endif /* SHA2_UNROLL_TRANSFORM */ - -void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) { - unsigned int freespace, usedspace; - - if (len == 0) { - /* Calling with no data is valid - we do nothing */ - return; - } - - /* Sanity check: */ - assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0); - - usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; - if (usedspace > 0) { - /* Calculate how much free space is available in the buffer */ - freespace = SHA256_BLOCK_LENGTH - usedspace; - - if (len >= freespace) { - /* Fill the buffer completely and process it */ - bcopy(data, &context->buffer[usedspace], freespace); - context->bitcount += freespace << 3; - len -= freespace; - data += freespace; - SHA256_Transform(context, (sha2_word32*)context->buffer); - } else { - /* The buffer is not yet full */ - bcopy(data, &context->buffer[usedspace], len); - context->bitcount += len << 3; - /* Clean up: */ - usedspace = freespace = 0; - return; - } - } - while (len >= SHA256_BLOCK_LENGTH) { - /* Process as many complete blocks as we can */ - SHA256_Transform(context, (const sha2_word32*)data); - context->bitcount += SHA256_BLOCK_LENGTH << 3; - len -= SHA256_BLOCK_LENGTH; - data += SHA256_BLOCK_LENGTH; - } - if (len > 0) { - /* There's left-overs, so save 'em */ - bcopy(data, context->buffer, len); - context->bitcount += len << 3; - } - /* Clean up: */ - usedspace = freespace = 0; -} - -void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) { - sha2_word32 *d = (sha2_word32*)digest; - unsigned int usedspace; - - /* Sanity check: */ - assert(context != (SHA256_CTX*)0); - - /* If no digest buffer is passed, we don't bother doing this: */ - if (digest != (sha2_byte*)0) { - usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; -#if BYTE_ORDER == LITTLE_ENDIAN - /* Convert FROM host byte order */ - REVERSE64(context->bitcount,context->bitcount); -#endif - if (usedspace > 0) { - /* Begin padding with a 1 bit: */ - context->buffer[usedspace++] = 0x80; - - if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) { - /* Set-up for the last transform: */ - bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace); - } else { - if (usedspace < SHA256_BLOCK_LENGTH) { - bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace); - } - /* Do second-to-last transform: */ - SHA256_Transform(context, (sha2_word32*)context->buffer); - - /* And set-up for the last transform: */ - bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); - } - } else { - /* Set-up for the last transform: */ - bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); - - /* Begin padding with a 1 bit: */ - *context->buffer = 0x80; - } - /* Set the bit count: */ - *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount; - - /* Final transform: */ - SHA256_Transform(context, (sha2_word32*)context->buffer); - -#if BYTE_ORDER == LITTLE_ENDIAN - { - /* Convert TO host byte order */ - int j; - for (j = 0; j < 8; j++) { - REVERSE32(context->state[j],context->state[j]); - *d++ = context->state[j]; - } - } -#else - bcopy(context->state, d, SHA256_DIGEST_LENGTH); -#endif - } - - /* Clean up state data: */ - bzero(context, sizeof(*context)); - usedspace = 0; -} - char *SHA256_End(SHA256_CTX* context, char buffer[]) { sha2_byte digest[SHA256_DIGEST_LENGTH], *d = digest; int i; @@ -641,7 +295,7 @@ char *SHA256_End(SHA256_CTX* context, char buffer[]) { return buffer; } -char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) { +char* SHA256_Data(const void *data, unsigned int len, char *digest) { SHA256_CTX context; SHA256_Init(&context); diff --git a/sys/crypto/sha2/sha2.h b/sys/crypto/sha2/sha2.h index 639d58b..59e91f6 100644 --- a/sys/crypto/sha2/sha2.h +++ b/sys/crypto/sha2/sha2.h @@ -56,70 +56,17 @@ extern "C" { #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) -/*** SHA-256/384/512 Context Structures *******************************/ -/* NOTE: If your architecture does not define either u_intXX_t types or - * uintXX_t (from inttypes.h), you may need to define things by hand - * for your system: - */ -#if 0 -typedef unsigned char u_int8_t; /* 1-byte (8-bits) */ -typedef unsigned int u_int32_t; /* 4-bytes (32-bits) */ -typedef unsigned long long u_int64_t; /* 8-bytes (64-bits) */ -#endif -/* - * Most BSD systems already define u_intXX_t types, as does Linux. - * Some systems, however, like Compaq's Tru64 Unix instead can use - * uintXX_t types defined by very recent ANSI C standards and included - * in the file: - * - * #include <inttypes.h> - * - * If you choose to use <inttypes.h> then please define: - * - * #define SHA2_USE_INTTYPES_H - * - * Or on the command line during compile: - * - * cc -DSHA2_USE_INTTYPES_H ... - */ -#if 0 /*def SHA2_USE_INTTYPES_H*/ - -typedef struct _SHA256_CTX { - uint32_t state[8]; - uint64_t bitcount; - uint8_t buffer[SHA256_BLOCK_LENGTH]; -} SHA256_CTX; +/*** SHA-384/512 Context Structures *******************************/ typedef struct _SHA512_CTX { uint64_t state[8]; uint64_t bitcount[2]; uint8_t buffer[SHA512_BLOCK_LENGTH]; } SHA512_CTX; -#else /* SHA2_USE_INTTYPES_H */ - -typedef struct _SHA256_CTX { - u_int32_t state[8]; - u_int64_t bitcount; - u_int8_t buffer[SHA256_BLOCK_LENGTH]; -} SHA256_CTX; -typedef struct _SHA512_CTX { - u_int64_t state[8]; - u_int64_t bitcount[2]; - u_int8_t buffer[SHA512_BLOCK_LENGTH]; -} SHA512_CTX; - -#endif /* SHA2_USE_INTTYPES_H */ - typedef SHA512_CTX SHA384_CTX; -/*** SHA-256/384/512 Function Prototypes ******************************/ - -void SHA256_Init(SHA256_CTX *); -void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t); -void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*); -char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]); -char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]); +/*** SHA-384/512 Function Prototypes ******************************/ void SHA384_Init(SHA384_CTX*); void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t); @@ -137,4 +84,6 @@ char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]); } #endif /* __cplusplus */ +#include "sha256.h" + #endif /* __SHA2_H__ */ diff --git a/sys/crypto/sha2/sha256.h b/sys/crypto/sha2/sha256.h new file mode 100644 index 0000000..c828379 --- /dev/null +++ b/sys/crypto/sha2/sha256.h @@ -0,0 +1,86 @@ +/*- + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA256_H_ +#define _SHA256_H_ + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +typedef struct SHA256Context { + uint32_t state[8]; + uint64_t count; + uint8_t buf[64]; +} SHA256_CTX; + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA256_Init +#define SHA256_Init _libmd_SHA256_Init +#endif +#ifndef SHA256_Update +#define SHA256_Update _libmd_SHA256_Update +#endif +#ifndef SHA256_Final +#define SHA256_Final _libmd_SHA256_Final +#endif +#ifndef SHA256_End +#define SHA256_End _libmd_SHA256_End +#endif +#ifndef SHA256_File +#define SHA256_File _libmd_SHA256_File +#endif +#ifndef SHA256_FileChunk +#define SHA256_FileChunk _libmd_SHA256_FileChunk +#endif +#ifndef SHA256_Data +#define SHA256_Data _libmd_SHA256_Data +#endif + +#ifndef SHA256_Transform +#define SHA256_Transform _libmd_SHA256_Transform +#endif +#ifndef SHA256_version +#define SHA256_version _libmd_SHA256_version +#endif + +void SHA256_Init(SHA256_CTX *); +void SHA256_Update(SHA256_CTX *, const void *, size_t); +void SHA256_Final(unsigned char [32], SHA256_CTX *); +char *SHA256_End(SHA256_CTX *, char *); +char *SHA256_Data(const void *, unsigned int, char *); +#ifndef _KERNEL +char *SHA256_File(const char *, char *); +char *SHA256_FileChunk(const char *, char *, off_t, off_t); +#endif +__END_DECLS + +#endif /* !_SHA256_H_ */ diff --git a/sys/crypto/sha2/sha256c.c b/sys/crypto/sha2/sha256c.c new file mode 100644 index 0000000..da9b02c --- /dev/null +++ b/sys/crypto/sha2/sha256c.c @@ -0,0 +1,316 @@ +/*- + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/endian.h> +#include <sys/types.h> + +#ifdef _KERNEL +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include "sha256.h" + +#if BYTE_ORDER == BIG_ENDIAN + +/* Copy a vector of big-endian uint32_t into a vector of bytes */ +#define be32enc_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +/* Copy a vector of bytes into a vector of big-endian uint32_t */ +#define be32dec_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +#else /* BYTE_ORDER != BIG_ENDIAN */ + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static void +be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + be32enc(dst + i * 4, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint32_t). Assumes len is a multiple of 4. + */ +static void +be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + dst[i] = be32dec(src + i * 4); +} + +#endif /* BYTE_ORDER != BIG_ENDIAN */ + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, k) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + k) + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t * state, const unsigned char block[64]) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + be32dec_vect(W, block, 64); + for (i = 16; i < 64; i++) + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0, 0x428a2f98); + RNDr(S, W, 1, 0x71374491); + RNDr(S, W, 2, 0xb5c0fbcf); + RNDr(S, W, 3, 0xe9b5dba5); + RNDr(S, W, 4, 0x3956c25b); + RNDr(S, W, 5, 0x59f111f1); + RNDr(S, W, 6, 0x923f82a4); + RNDr(S, W, 7, 0xab1c5ed5); + RNDr(S, W, 8, 0xd807aa98); + RNDr(S, W, 9, 0x12835b01); + RNDr(S, W, 10, 0x243185be); + RNDr(S, W, 11, 0x550c7dc3); + RNDr(S, W, 12, 0x72be5d74); + RNDr(S, W, 13, 0x80deb1fe); + RNDr(S, W, 14, 0x9bdc06a7); + RNDr(S, W, 15, 0xc19bf174); + RNDr(S, W, 16, 0xe49b69c1); + RNDr(S, W, 17, 0xefbe4786); + RNDr(S, W, 18, 0x0fc19dc6); + RNDr(S, W, 19, 0x240ca1cc); + RNDr(S, W, 20, 0x2de92c6f); + RNDr(S, W, 21, 0x4a7484aa); + RNDr(S, W, 22, 0x5cb0a9dc); + RNDr(S, W, 23, 0x76f988da); + RNDr(S, W, 24, 0x983e5152); + RNDr(S, W, 25, 0xa831c66d); + RNDr(S, W, 26, 0xb00327c8); + RNDr(S, W, 27, 0xbf597fc7); + RNDr(S, W, 28, 0xc6e00bf3); + RNDr(S, W, 29, 0xd5a79147); + RNDr(S, W, 30, 0x06ca6351); + RNDr(S, W, 31, 0x14292967); + RNDr(S, W, 32, 0x27b70a85); + RNDr(S, W, 33, 0x2e1b2138); + RNDr(S, W, 34, 0x4d2c6dfc); + RNDr(S, W, 35, 0x53380d13); + RNDr(S, W, 36, 0x650a7354); + RNDr(S, W, 37, 0x766a0abb); + RNDr(S, W, 38, 0x81c2c92e); + RNDr(S, W, 39, 0x92722c85); + RNDr(S, W, 40, 0xa2bfe8a1); + RNDr(S, W, 41, 0xa81a664b); + RNDr(S, W, 42, 0xc24b8b70); + RNDr(S, W, 43, 0xc76c51a3); + RNDr(S, W, 44, 0xd192e819); + RNDr(S, W, 45, 0xd6990624); + RNDr(S, W, 46, 0xf40e3585); + RNDr(S, W, 47, 0x106aa070); + RNDr(S, W, 48, 0x19a4c116); + RNDr(S, W, 49, 0x1e376c08); + RNDr(S, W, 50, 0x2748774c); + RNDr(S, W, 51, 0x34b0bcb5); + RNDr(S, W, 52, 0x391c0cb3); + RNDr(S, W, 53, 0x4ed8aa4a); + RNDr(S, W, 54, 0x5b9cca4f); + RNDr(S, W, 55, 0x682e6ff3); + RNDr(S, W, 56, 0x748f82ee); + RNDr(S, W, 57, 0x78a5636f); + RNDr(S, W, 58, 0x84c87814); + RNDr(S, W, 59, 0x8cc70208); + RNDr(S, W, 60, 0x90befffa); + RNDr(S, W, 61, 0xa4506ceb); + RNDr(S, W, 62, 0xbef9a3f7); + RNDr(S, W, 63, 0xc67178f2); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +static unsigned char PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx) +{ + unsigned char len[8]; + uint32_t r, plen; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be64enc(len, ctx->count); + + /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ + r = (ctx->count >> 3) & 0x3f; + plen = (r < 56) ? (56 - r) : (120 - r); + SHA256_Update(ctx, PAD, (size_t)plen); + + /* Add the terminating bit-count */ + SHA256_Update(ctx, len, 8); +} + +/* SHA-256 initialization. Begins a SHA-256 operation. */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6A09E667; + ctx->state[1] = 0xBB67AE85; + ctx->state[2] = 0x3C6EF372; + ctx->state[3] = 0xA54FF53A; + ctx->state[4] = 0x510E527F; + ctx->state[5] = 0x9B05688C; + ctx->state[6] = 0x1F83D9AB; + ctx->state[7] = 0x5BE0CD19; +} + +/* Add bytes into the hash */ +void +SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len) +{ + uint64_t bitlen; + uint32_t r; + const unsigned char *src = in; + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count >> 3) & 0x3f; + + /* Convert the length into a number of bits */ + bitlen = len << 3; + + /* Update number of bits */ + ctx->count += bitlen; + + /* Handle the case where we don't need to perform any transforms */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks */ + while (len >= 64) { + SHA256_Transform(ctx->state, src); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-256 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx) +{ + + /* Add padding */ + SHA256_Pad(ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, 32); + + /* Clear the context state */ + memset((void *)ctx, 0, sizeof(*ctx)); +} + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef SHA256_Init +__weak_reference(_libmd_SHA256_Init, SHA256_Init); +#undef SHA256_Update +__weak_reference(_libmd_SHA256_Update, SHA256_Update); +#undef SHA256_Final +__weak_reference(_libmd_SHA256_Final, SHA256_Final); +#undef SHA256_Transform +__weak_reference(_libmd_SHA256_Transform, SHA256_Transform); +#endif diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 9dffc40..c952b63 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -75,7 +75,7 @@ struct padlock_sha_ctx { CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx)); static void padlock_sha_init(struct padlock_sha_ctx *ctx); -static int padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, +static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize); static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); @@ -83,16 +83,16 @@ static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static struct auth_hash padlock_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha1_final }; static struct auth_hash padlock_hmac_sha256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha256_final }; @@ -167,7 +167,7 @@ padlock_sha_init(struct padlock_sha_ctx *ctx) } static int -padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, uint16_t bufsize) +padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize) { if (ctx->psc_size - ctx->psc_offset < bufsize) { |