summaryrefslogtreecommitdiffstats
path: root/sys/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'sys/crypto')
-rw-r--r--sys/crypto/aesni/aesencdec.h24
-rw-r--r--sys/crypto/aesni/aesni.c354
-rw-r--r--sys/crypto/aesni/aesni.h15
-rw-r--r--sys/crypto/aesni/aesni_ghash.c804
-rw-r--r--sys/crypto/aesni/aesni_os.h33
-rw-r--r--sys/crypto/aesni/aesni_wrap.c129
-rw-r--r--sys/crypto/rijndael/rijndael-api-fst.c19
-rw-r--r--sys/crypto/rijndael/rijndael-api-fst.h18
-rw-r--r--sys/crypto/sha1.h2
-rw-r--r--sys/crypto/sha2/sha2.c348
-rw-r--r--sys/crypto/sha2/sha2.h59
-rw-r--r--sys/crypto/sha2/sha256.h86
-rw-r--r--sys/crypto/sha2/sha256c.c316
-rw-r--r--sys/crypto/via/padlock_hash.c12
14 files changed, 1710 insertions, 509 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h
index 5e4f128..80951a4 100644
--- a/sys/crypto/aesni/aesencdec.h
+++ b/sys/crypto/aesni/aesencdec.h
@@ -1,5 +1,6 @@
/*-
* Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
+ * Copyright 2015 Netflix, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,6 +28,11 @@
*
*/
+#ifndef _AESENCDEC_H_
+#define _AESENCDEC_H_
+
+#include <crypto/aesni/aesni_os.h>
+
#include <wmmintrin.h>
static inline void
@@ -103,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a,
out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
}
+/* rounds is passed in as rounds - 1 */
static inline __m128i
aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
{
@@ -110,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
int i;
tmp = from ^ keysched[0];
-
- for (i = 0; i < rounds; i++)
+ for (i = 1; i < rounds; i += 2) {
+ tmp = _mm_aesenc_si128(tmp, keysched[i]);
tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
+ }
- return _mm_aesenclast_si128(tmp, keysched[i + 1]);
+ tmp = _mm_aesenc_si128(tmp, keysched[rounds]);
+ return _mm_aesenclast_si128(tmp, keysched[rounds + 1]);
}
static inline __m128i
@@ -125,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from)
tmp = from ^ keysched[0];
- for (i = 0; i < rounds; i++)
+ for (i = 1; i < rounds; i += 2) {
+ tmp = _mm_aesdec_si128(tmp, keysched[i]);
tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
+ }
- return _mm_aesdeclast_si128(tmp, keysched[i + 1]);
+ tmp = _mm_aesdec_si128(tmp, keysched[rounds]);
+ return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]);
}
+
+#endif /* _AESENCDEC_H_ */
diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c
index 7d7a740..aa0a73c 100644
--- a/sys/crypto/aesni/aesni.c
+++ b/sys/crypto/aesni/aesni.c
@@ -1,8 +1,13 @@
/*-
* Copyright (c) 2005-2008 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
+ * Copyright (c) 2014 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -39,16 +44,37 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/bus.h>
#include <sys/uio.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
#include <crypto/aesni/aesni.h>
#include <cryptodev_if.h>
+#include <opencrypto/gmac.h>
+
+static struct mtx_padalign *ctx_mtx;
+static struct fpu_kern_ctx **ctx_fpu;
struct aesni_softc {
+ int dieing;
int32_t cid;
uint32_t sid;
TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions;
struct rwlock lock;
};
+#define AQUIRE_CTX(i, ctx) \
+ do { \
+ (i) = PCPU_GET(cpuid); \
+ mtx_lock(&ctx_mtx[(i)]); \
+ (ctx) = ctx_fpu[(i)]; \
+ } while (0)
+#define RELEASE_CTX(i, ctx) \
+ do { \
+ mtx_unlock(&ctx_mtx[(i)]); \
+ (i) = -1; \
+ (ctx) = NULL; \
+ } while (0)
+
static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri);
static int aesni_freesession(device_t, uint64_t tid);
static void aesni_freesession_locked(struct aesni_softc *sc,
@@ -56,7 +82,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc,
static int aesni_cipher_setup(struct aesni_session *ses,
struct cryptoini *encini);
static int aesni_cipher_process(struct aesni_session *ses,
- struct cryptodesc *enccrd, struct cryptop *crp);
+ struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp);
MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data");
@@ -79,23 +105,45 @@ aesni_probe(device_t dev)
return (EINVAL);
}
- if ((cpu_feature & CPUID_SSE2) == 0) {
- device_printf(dev, "No SSE2 support but AESNI!?!\n");
+ if ((cpu_feature2 & CPUID2_SSE41) == 0) {
+ device_printf(dev, "No SSE4.1 support.\n");
return (EINVAL);
}
- device_set_desc_copy(dev, "AES-CBC,AES-XTS");
+ device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM");
return (0);
}
+static void
+aensi_cleanctx(void)
+{
+ int i;
+
+ /* XXX - no way to return driverid */
+ CPU_FOREACH(i) {
+ if (ctx_fpu[i] != NULL) {
+ mtx_destroy(&ctx_mtx[i]);
+ fpu_kern_free_ctx(ctx_fpu[i]);
+ }
+ ctx_fpu[i] = NULL;
+ }
+ free(ctx_mtx, M_AESNI);
+ ctx_mtx = NULL;
+ free(ctx_fpu, M_AESNI);
+ ctx_fpu = NULL;
+}
+
static int
aesni_attach(device_t dev)
{
struct aesni_softc *sc;
+ int i;
sc = device_get_softc(dev);
+ sc->dieing = 0;
TAILQ_INIT(&sc->sessions);
sc->sid = 1;
+
sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE |
CRYPTOCAP_F_SYNC);
if (sc->cid < 0) {
@@ -103,8 +151,23 @@ aesni_attach(device_t dev)
return (ENOMEM);
}
+ ctx_mtx = malloc(sizeof *ctx_mtx * (mp_maxid + 1), M_AESNI,
+ M_WAITOK|M_ZERO);
+ ctx_fpu = malloc(sizeof *ctx_fpu * (mp_maxid + 1), M_AESNI,
+ M_WAITOK|M_ZERO);
+
+ CPU_FOREACH(i) {
+ ctx_fpu[i] = fpu_kern_alloc_ctx(0);
+ mtx_init(&ctx_mtx[i], "anifpumtx", NULL, MTX_DEF);
+ }
+
rw_init(&sc->lock, "aesni_lock");
crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0);
+ crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0);
crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0);
return (0);
}
@@ -116,6 +179,7 @@ aesni_detach(device_t dev)
struct aesni_session *ses;
sc = device_get_softc(dev);
+
rw_wlock(&sc->lock);
TAILQ_FOREACH(ses, &sc->sessions, next) {
if (ses->used) {
@@ -125,14 +189,18 @@ aesni_detach(device_t dev)
return (EBUSY);
}
}
+ sc->dieing = 1;
while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) {
TAILQ_REMOVE(&sc->sessions, ses, next);
- fpu_kern_free_ctx(ses->fpu_ctx);
free(ses, M_AESNI);
}
rw_wunlock(&sc->lock);
- rw_destroy(&sc->lock);
crypto_unregister_all(sc->cid);
+
+ rw_destroy(&sc->lock);
+
+ aensi_cleanctx();
+
return (0);
}
@@ -144,28 +212,52 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
struct cryptoini *encini;
int error;
- if (sidp == NULL || cri == NULL)
+ if (sidp == NULL || cri == NULL) {
+ CRYPTDEB("no sidp or cri");
return (EINVAL);
+ }
sc = device_get_softc(dev);
+ if (sc->dieing)
+ return (EINVAL);
+
ses = NULL;
encini = NULL;
for (; cri != NULL; cri = cri->cri_next) {
switch (cri->cri_alg) {
case CRYPTO_AES_CBC:
+ case CRYPTO_AES_ICM:
case CRYPTO_AES_XTS:
- if (encini != NULL)
+ case CRYPTO_AES_NIST_GCM_16:
+ if (encini != NULL) {
+ CRYPTDEB("encini already set");
return (EINVAL);
+ }
encini = cri;
break;
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ /*
+ * nothing to do here, maybe in the future cache some
+ * values for GHASH
+ */
+ break;
default:
+ CRYPTDEB("unhandled algorithm");
return (EINVAL);
}
}
- if (encini == NULL)
+ if (encini == NULL) {
+ CRYPTDEB("no cipher");
return (EINVAL);
+ }
rw_wlock(&sc->lock);
+ if (sc->dieing) {
+ rw_wunlock(&sc->lock);
+ return (EINVAL);
+ }
/*
* Free sessions goes first, so if first session is used, we need to
* allocate one.
@@ -177,13 +269,6 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
rw_wunlock(&sc->lock);
return (ENOMEM);
}
- ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL |
- FPU_KERN_NOWAIT);
- if (ses->fpu_ctx == NULL) {
- free(ses, M_AESNI);
- rw_wunlock(&sc->lock);
- return (ENOMEM);
- }
ses->id = sc->sid++;
} else {
TAILQ_REMOVE(&sc->sessions, ses, next);
@@ -195,6 +280,7 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
error = aesni_cipher_setup(ses, encini);
if (error != 0) {
+ CRYPTDEB("setup failed");
rw_wlock(&sc->lock);
aesni_freesession_locked(sc, ses);
rw_wunlock(&sc->lock);
@@ -208,15 +294,14 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
static void
aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses)
{
- struct fpu_kern_ctx *ctx;
uint32_t sid;
+ rw_assert(&sc->lock, RA_WLOCKED);
+
sid = ses->id;
TAILQ_REMOVE(&sc->sessions, ses, next);
- ctx = ses->fpu_ctx;
- bzero(ses, sizeof(*ses));
+ *ses = (struct aesni_session){};
ses->id = sid;
- ses->fpu_ctx = ctx;
TAILQ_INSERT_HEAD(&sc->sessions, ses, next);
}
@@ -248,11 +333,13 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
{
struct aesni_softc *sc = device_get_softc(dev);
struct aesni_session *ses = NULL;
- struct cryptodesc *crd, *enccrd;
- int error;
+ struct cryptodesc *crd, *enccrd, *authcrd;
+ int error, needauth;
error = 0;
enccrd = NULL;
+ authcrd = NULL;
+ needauth = 0;
/* Sanity check. */
if (crp == NULL)
@@ -266,6 +353,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) {
switch (crd->crd_alg) {
case CRYPTO_AES_CBC:
+ case CRYPTO_AES_ICM:
case CRYPTO_AES_XTS:
if (enccrd != NULL) {
error = EINVAL;
@@ -273,11 +361,41 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
}
enccrd = crd;
break;
+
+ case CRYPTO_AES_NIST_GCM_16:
+ if (enccrd != NULL) {
+ error = EINVAL;
+ goto out;
+ }
+ enccrd = crd;
+ needauth = 1;
+ break;
+
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ if (authcrd != NULL) {
+ error = EINVAL;
+ goto out;
+ }
+ authcrd = crd;
+ needauth = 1;
+ break;
+
default:
- return (EINVAL);
+ error = EINVAL;
+ goto out;
}
}
- if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) {
+
+ if (enccrd == NULL || (needauth && authcrd == NULL)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* CBC & XTS can only handle full blocks for now */
+ if ((enccrd->crd_alg == CRYPTO_AES_CBC || enccrd->crd_alg ==
+ CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) {
error = EINVAL;
goto out;
}
@@ -293,7 +411,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused)
goto out;
}
- error = aesni_cipher_process(ses, enccrd, crp);
+ error = aesni_cipher_process(ses, enccrd, authcrd, crp);
if (error != 0)
goto out;
@@ -307,21 +425,26 @@ uint8_t *
aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
int *allocated)
{
+ struct mbuf *m;
struct uio *uio;
struct iovec *iov;
uint8_t *addr;
- if (crp->crp_flags & CRYPTO_F_IMBUF)
- goto alloc;
- else if (crp->crp_flags & CRYPTO_F_IOV) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF) {
+ m = (struct mbuf *)crp->crp_buf;
+ if (m->m_next != NULL)
+ goto alloc;
+ addr = mtod(m, uint8_t *);
+ } else if (crp->crp_flags & CRYPTO_F_IOV) {
uio = (struct uio *)crp->crp_buf;
if (uio->uio_iovcnt != 1)
goto alloc;
iov = uio->uio_iov;
- addr = (u_char *)iov->iov_base + enccrd->crd_skip;
+ addr = (uint8_t *)iov->iov_base;
} else
- addr = (u_char *)crp->crp_buf;
+ addr = (uint8_t *)crp->crp_buf;
*allocated = 0;
+ addr += enccrd->crd_skip;
return (addr);
alloc:
@@ -362,37 +485,75 @@ MODULE_DEPEND(aesni, crypto, 1, 1, 1);
static int
aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
{
- struct thread *td;
+ struct fpu_kern_ctx *ctx;
int error;
+ int kt, ctxidx;
+
+ kt = is_fpu_kern_thread(0);
+ if (!kt) {
+ AQUIRE_CTX(ctxidx, ctx);
+ error = fpu_kern_enter(curthread, ctx,
+ FPU_KERN_NORMAL | FPU_KERN_KTHR);
+ if (error != 0)
+ goto out;
+ }
- td = curthread;
- error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL |
- FPU_KERN_KTHR);
- if (error != 0)
- return (error);
error = aesni_cipher_setup_common(ses, encini->cri_key,
encini->cri_klen);
- fpu_kern_leave(td, ses->fpu_ctx);
+
+ if (!kt) {
+ fpu_kern_leave(curthread, ctx);
+out:
+ RELEASE_CTX(ctxidx, ctx);
+ }
return (error);
}
+/*
+ * authcrd contains the associated date.
+ */
static int
aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
- struct cryptop *crp)
+ struct cryptodesc *authcrd, struct cryptop *crp)
{
- struct thread *td;
- uint8_t *buf;
- int error, allocated;
+ struct fpu_kern_ctx *ctx;
+ uint8_t iv[AES_BLOCK_LEN];
+ uint8_t tag[GMAC_DIGEST_LEN];
+ uint8_t *buf, *authbuf;
+ int error, allocated, authallocated;
+ int ivlen, encflag;
+ int kt, ctxidx;
+
+ encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT;
+
+ if ((enccrd->crd_alg == CRYPTO_AES_ICM ||
+ enccrd->crd_alg == CRYPTO_AES_NIST_GCM_16) &&
+ (enccrd->crd_flags & CRD_F_IV_EXPLICIT) == 0)
+ return (EINVAL);
buf = aesni_cipher_alloc(enccrd, crp, &allocated);
if (buf == NULL)
return (ENOMEM);
- td = curthread;
- error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL |
- FPU_KERN_KTHR);
- if (error != 0)
- goto out1;
+ error = 0;
+ authbuf = NULL;
+ authallocated = 0;
+ if (authcrd != NULL) {
+ authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated);
+ if (authbuf == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ kt = is_fpu_kern_thread(0);
+ if (!kt) {
+ AQUIRE_CTX(ctxidx, ctx);
+ error = fpu_kern_enter(curthread, ctx,
+ FPU_KERN_NORMAL|FPU_KERN_KTHR);
+ if (error != 0)
+ goto out2;
+ }
if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
error = aesni_cipher_setup_common(ses, enccrd->crd_key,
@@ -401,48 +562,105 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
goto out;
}
- if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
+ /* XXX - validate that enccrd and authcrd have/use same key? */
+ switch (enccrd->crd_alg) {
+ case CRYPTO_AES_CBC:
+ case CRYPTO_AES_ICM:
+ ivlen = AES_BLOCK_LEN;
+ break;
+ case CRYPTO_AES_XTS:
+ ivlen = 8;
+ break;
+ case CRYPTO_AES_NIST_GCM_16:
+ ivlen = 12; /* should support arbitarily larger */
+ break;
+ }
+
+ /* Setup iv */
+ if (encflag) {
if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
- bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
+ bcopy(enccrd->crd_iv, iv, ivlen);
+ else
+ arc4rand(iv, ivlen, 0);
+
if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
crypto_copyback(crp->crp_flags, crp->crp_buf,
- enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
- if (ses->algo == CRYPTO_AES_CBC) {
- aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
- enccrd->crd_len, buf, buf, ses->iv);
- } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
- aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
- ses->xts_schedule, enccrd->crd_len, buf, buf,
- ses->iv);
- }
+ enccrd->crd_inject, ivlen, iv);
} else {
if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
- bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
+ bcopy(enccrd->crd_iv, iv, ivlen);
else
crypto_copydata(crp->crp_flags, crp->crp_buf,
- enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
- if (ses->algo == CRYPTO_AES_CBC) {
+ enccrd->crd_inject, ivlen, iv);
+ }
+
+ if (authcrd != NULL && !encflag)
+ crypto_copydata(crp->crp_flags, crp->crp_buf,
+ authcrd->crd_inject, GMAC_DIGEST_LEN, tag);
+ else
+ bzero(tag, sizeof tag);
+
+ /* Do work */
+ switch (ses->algo) {
+ case CRYPTO_AES_CBC:
+ if (encflag)
+ aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
+ enccrd->crd_len, buf, buf, iv);
+ else
aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
- enccrd->crd_len, buf, ses->iv);
- } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
+ enccrd->crd_len, buf, iv);
+ break;
+ case CRYPTO_AES_ICM:
+ /* encryption & decryption are the same */
+ aesni_encrypt_icm(ses->rounds, ses->enc_schedule,
+ enccrd->crd_len, buf, buf, iv);
+ break;
+ case CRYPTO_AES_XTS:
+ if (encflag)
+ aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
+ ses->xts_schedule, enccrd->crd_len, buf, buf,
+ iv);
+ else
aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
ses->xts_schedule, enccrd->crd_len, buf, buf,
- ses->iv);
+ iv);
+ break;
+ case CRYPTO_AES_NIST_GCM_16:
+ if (encflag)
+ AES_GCM_encrypt(buf, buf, authbuf, iv, tag,
+ enccrd->crd_len, authcrd->crd_len, ivlen,
+ ses->enc_schedule, ses->rounds);
+ else {
+ if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag,
+ enccrd->crd_len, authcrd->crd_len, ivlen,
+ ses->enc_schedule, ses->rounds))
+ error = EBADMSG;
}
+ break;
}
+
if (allocated)
crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
enccrd->crd_len, buf);
- if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
- crypto_copydata(crp->crp_flags, crp->crp_buf,
- enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
- AES_BLOCK_LEN, ses->iv);
+
+ if (!error && authcrd != NULL) {
+ crypto_copyback(crp->crp_flags, crp->crp_buf,
+ authcrd->crd_inject, GMAC_DIGEST_LEN, tag);
+ }
+
out:
- fpu_kern_leave(td, ses->fpu_ctx);
+ if (!kt) {
+ fpu_kern_leave(curthread, ctx);
+out2:
+ RELEASE_CTX(ctxidx, ctx);
+ }
+
out1:
if (allocated) {
bzero(buf, enccrd->crd_len);
free(buf, M_AESNI);
}
+ if (authallocated)
+ free(authbuf, M_AESNI);
return (error);
}
diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h
index ff1d1a2..3d5adec 100644
--- a/sys/crypto/aesni/aesni.h
+++ b/sys/crypto/aesni/aesni.h
@@ -56,7 +56,6 @@ struct aesni_session {
uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16);
uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16);
uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16);
- uint8_t iv[AES_BLOCK_LEN];
int algo;
int rounds;
/* uint8_t *ses_ictx; */
@@ -65,7 +64,6 @@ struct aesni_session {
int used;
uint32_t id;
TAILQ_ENTRY(aesni_session) next;
- struct fpu_kern_ctx *fpu_ctx;
};
/*
@@ -88,6 +86,9 @@ void aesni_encrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/,
size_t len, const uint8_t *from, uint8_t *to);
void aesni_decrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/,
size_t len, const uint8_t *from, uint8_t *to);
+void aesni_encrypt_icm(int rounds, const void *key_schedule /*__aligned(16)*/,
+ size_t len, const uint8_t *from, uint8_t *to,
+ const uint8_t iv[AES_BLOCK_LEN]);
void aesni_encrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/,
const void *tweak_schedule /*__aligned(16)*/, size_t len,
@@ -96,6 +97,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/,
const void *tweak_schedule /*__aligned(16)*/, size_t len,
const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
+/* GCM & GHASH functions */
+void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes,
+ const unsigned char *key, int nr);
+int AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes,
+ const unsigned char *key, int nr);
+
int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
int keylen);
uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c
new file mode 100644
index 0000000..54c8815
--- /dev/null
+++ b/sys/crypto/aesni/aesni_ghash.c
@@ -0,0 +1,804 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * $FreeBSD$
+ *
+ */
+
+/*
+ * Figure 5, 8 and 12 are copied from the Intel white paper:
+ * Intel® Carry-Less Multiplication Instruction and its Usage for
+ * Computing the GCM Mode
+ *
+ * and as such are:
+ * Copyright © 2010 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <crypto/aesni/aesni.h>
+#include <crypto/aesni/aesni_os.h>
+#else
+#include <stdint.h>
+#endif
+
+#include <wmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+static inline int
+m128icmp(__m128i a, __m128i b)
+{
+ __m128i cmp;
+
+ cmp = _mm_cmpeq_epi32(a, b);
+
+ return _mm_movemask_epi8(cmp) == 0xffff;
+}
+
+#ifdef __i386__
+static inline __m128i
+_mm_insert_epi64(__m128i a, int64_t b, const int ndx)
+{
+
+ if (!ndx) {
+ a = _mm_insert_epi32(a, b, 0);
+ a = _mm_insert_epi32(a, b >> 32, 1);
+ } else {
+ a = _mm_insert_epi32(a, b, 2);
+ a = _mm_insert_epi32(a, b >> 32, 3);
+ }
+
+ return a;
+}
+#endif
+
+/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */
+
+/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */
+static void
+gfmul(__m128i a, __m128i b, __m128i *res)
+{
+ __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
+
+ tmp3 = _mm_clmulepi64_si128(a, b, 0x00);
+ tmp4 = _mm_clmulepi64_si128(a, b, 0x10);
+ tmp5 = _mm_clmulepi64_si128(a, b, 0x01);
+ tmp6 = _mm_clmulepi64_si128(a, b, 0x11);
+
+ tmp4 = _mm_xor_si128(tmp4, tmp5);
+ tmp5 = _mm_slli_si128(tmp4, 8);
+ tmp4 = _mm_srli_si128(tmp4, 8);
+ tmp3 = _mm_xor_si128(tmp3, tmp5);
+ tmp6 = _mm_xor_si128(tmp6, tmp4);
+
+ tmp7 = _mm_srli_epi32(tmp3, 31);
+ tmp8 = _mm_srli_epi32(tmp6, 31);
+ tmp3 = _mm_slli_epi32(tmp3, 1);
+ tmp6 = _mm_slli_epi32(tmp6, 1);
+
+ tmp9 = _mm_srli_si128(tmp7, 12);
+ tmp8 = _mm_slli_si128(tmp8, 4);
+ tmp7 = _mm_slli_si128(tmp7, 4);
+ tmp3 = _mm_or_si128(tmp3, tmp7);
+ tmp6 = _mm_or_si128(tmp6, tmp8);
+ tmp6 = _mm_or_si128(tmp6, tmp9);
+
+ tmp7 = _mm_slli_epi32(tmp3, 31);
+ tmp8 = _mm_slli_epi32(tmp3, 30);
+ tmp9 = _mm_slli_epi32(tmp3, 25);
+
+ tmp7 = _mm_xor_si128(tmp7, tmp8);
+ tmp7 = _mm_xor_si128(tmp7, tmp9);
+ tmp8 = _mm_srli_si128(tmp7, 4);
+ tmp7 = _mm_slli_si128(tmp7, 12);
+ tmp3 = _mm_xor_si128(tmp3, tmp7);
+
+ tmp2 = _mm_srli_epi32(tmp3, 1);
+ tmp4 = _mm_srli_epi32(tmp3, 2);
+ tmp5 = _mm_srli_epi32(tmp3, 7);
+ tmp2 = _mm_xor_si128(tmp2, tmp4);
+ tmp2 = _mm_xor_si128(tmp2, tmp5);
+ tmp2 = _mm_xor_si128(tmp2, tmp8);
+ tmp3 = _mm_xor_si128(tmp3, tmp2);
+ tmp6 = _mm_xor_si128(tmp6, tmp3);
+
+ *res = tmp6;
+}
+
+/*
+ * Figure 8. Code Sample - Performing Ghash Using an Aggregated Reduction
+ * Method */
+static void
+reduce4(__m128i H1, __m128i H2, __m128i H3, __m128i H4,
+ __m128i X1, __m128i X2, __m128i X3, __m128i X4, __m128i *res)
+{
+ /*algorithm by Krzysztof Jankowski, Pierre Laurent - Intel*/
+ __m128i H1_X1_lo, H1_X1_hi, H2_X2_lo, H2_X2_hi, H3_X3_lo,
+ H3_X3_hi, H4_X4_lo, H4_X4_hi, lo, hi;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i tmp4, tmp5, tmp6, tmp7;
+ __m128i tmp8, tmp9;
+
+ H1_X1_lo = _mm_clmulepi64_si128(H1, X1, 0x00);
+ H2_X2_lo = _mm_clmulepi64_si128(H2, X2, 0x00);
+ H3_X3_lo = _mm_clmulepi64_si128(H3, X3, 0x00);
+ H4_X4_lo = _mm_clmulepi64_si128(H4, X4, 0x00);
+
+ lo = _mm_xor_si128(H1_X1_lo, H2_X2_lo);
+ lo = _mm_xor_si128(lo, H3_X3_lo);
+ lo = _mm_xor_si128(lo, H4_X4_lo);
+
+ H1_X1_hi = _mm_clmulepi64_si128(H1, X1, 0x11);
+ H2_X2_hi = _mm_clmulepi64_si128(H2, X2, 0x11);
+ H3_X3_hi = _mm_clmulepi64_si128(H3, X3, 0x11);
+ H4_X4_hi = _mm_clmulepi64_si128(H4, X4, 0x11);
+
+ hi = _mm_xor_si128(H1_X1_hi, H2_X2_hi);
+ hi = _mm_xor_si128(hi, H3_X3_hi);
+ hi = _mm_xor_si128(hi, H4_X4_hi);
+
+ tmp0 = _mm_shuffle_epi32(H1, 78);
+ tmp4 = _mm_shuffle_epi32(X1, 78);
+ tmp0 = _mm_xor_si128(tmp0, H1);
+ tmp4 = _mm_xor_si128(tmp4, X1);
+ tmp1 = _mm_shuffle_epi32(H2, 78);
+ tmp5 = _mm_shuffle_epi32(X2, 78);
+ tmp1 = _mm_xor_si128(tmp1, H2);
+ tmp5 = _mm_xor_si128(tmp5, X2);
+ tmp2 = _mm_shuffle_epi32(H3, 78);
+ tmp6 = _mm_shuffle_epi32(X3, 78);
+ tmp2 = _mm_xor_si128(tmp2, H3);
+ tmp6 = _mm_xor_si128(tmp6, X3);
+ tmp3 = _mm_shuffle_epi32(H4, 78);
+ tmp7 = _mm_shuffle_epi32(X4, 78);
+ tmp3 = _mm_xor_si128(tmp3, H4);
+ tmp7 = _mm_xor_si128(tmp7, X4);
+
+ tmp0 = _mm_clmulepi64_si128(tmp0, tmp4, 0x00);
+ tmp1 = _mm_clmulepi64_si128(tmp1, tmp5, 0x00);
+ tmp2 = _mm_clmulepi64_si128(tmp2, tmp6, 0x00);
+ tmp3 = _mm_clmulepi64_si128(tmp3, tmp7, 0x00);
+
+ tmp0 = _mm_xor_si128(tmp0, lo);
+ tmp0 = _mm_xor_si128(tmp0, hi);
+ tmp0 = _mm_xor_si128(tmp1, tmp0);
+ tmp0 = _mm_xor_si128(tmp2, tmp0);
+ tmp0 = _mm_xor_si128(tmp3, tmp0);
+
+ tmp4 = _mm_slli_si128(tmp0, 8);
+ tmp0 = _mm_srli_si128(tmp0, 8);
+
+ lo = _mm_xor_si128(tmp4, lo);
+ hi = _mm_xor_si128(tmp0, hi);
+
+ tmp3 = lo;
+ tmp6 = hi;
+
+ tmp7 = _mm_srli_epi32(tmp3, 31);
+ tmp8 = _mm_srli_epi32(tmp6, 31);
+ tmp3 = _mm_slli_epi32(tmp3, 1);
+ tmp6 = _mm_slli_epi32(tmp6, 1);
+
+ tmp9 = _mm_srli_si128(tmp7, 12);
+ tmp8 = _mm_slli_si128(tmp8, 4);
+ tmp7 = _mm_slli_si128(tmp7, 4);
+ tmp3 = _mm_or_si128(tmp3, tmp7);
+ tmp6 = _mm_or_si128(tmp6, tmp8);
+ tmp6 = _mm_or_si128(tmp6, tmp9);
+
+ tmp7 = _mm_slli_epi32(tmp3, 31);
+ tmp8 = _mm_slli_epi32(tmp3, 30);
+ tmp9 = _mm_slli_epi32(tmp3, 25);
+
+ tmp7 = _mm_xor_si128(tmp7, tmp8);
+ tmp7 = _mm_xor_si128(tmp7, tmp9);
+ tmp8 = _mm_srli_si128(tmp7, 4);
+ tmp7 = _mm_slli_si128(tmp7, 12);
+ tmp3 = _mm_xor_si128(tmp3, tmp7);
+
+ tmp2 = _mm_srli_epi32(tmp3, 1);
+ tmp4 = _mm_srli_epi32(tmp3, 2);
+ tmp5 = _mm_srli_epi32(tmp3, 7);
+ tmp2 = _mm_xor_si128(tmp2, tmp4);
+ tmp2 = _mm_xor_si128(tmp2, tmp5);
+ tmp2 = _mm_xor_si128(tmp2, tmp8);
+ tmp3 = _mm_xor_si128(tmp3, tmp2);
+ tmp6 = _mm_xor_si128(tmp6, tmp3);
+
+ *res = tmp6;
+}
+
+/*
+ * Figure 12. AES-GCM: Processing Four Blocks in Parallel with Aggregated
+ * Every Four Blocks
+ */
+/*
+ * per NIST SP-800-38D, 5.2.1.1, len(p) <= 2^39-256 (in bits), or
+ * 2^32-256*8*16 bytes.
+ */
+void
+AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes,
+ const unsigned char *key, int nr)
+{
+ int i, j ,k;
+ __m128i tmp1, tmp2, tmp3, tmp4;
+ __m128i tmp5, tmp6, tmp7, tmp8;
+ __m128i H, H2, H3, H4, Y, T;
+ __m128i *KEY = (__m128i*)key;
+ __m128i ctr1, ctr2, ctr3, ctr4;
+ __m128i ctr5, ctr6, ctr7, ctr8;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i ONE = _mm_set_epi32(0, 1, 0, 0);
+ __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0);
+ __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,
+ 7);
+ __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15);
+ __m128i X = _mm_setzero_si128();
+
+ if (ibytes == 96/8) {
+ Y = _mm_loadu_si128((__m128i*)ivec);
+ Y = _mm_insert_epi32(Y, 0x1000000, 3);
+ /*(Compute E[ZERO, KS] and E[Y0, KS] together*/
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ tmp2 = _mm_xor_si128(Y, KEY[0]);
+ for (j=1; j < nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]);
+
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ T = _mm_aesenclast_si128(tmp2, KEY[nr]);
+
+ H = _mm_shuffle_epi8(H, BSWAP_MASK);
+ } else {
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ for (j=1; j <nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+
+ H = _mm_shuffle_epi8(H, BSWAP_MASK);
+ Y = _mm_setzero_si128();
+
+ for (i=0; i < ibytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ }
+ if (ibytes%16) {
+ for (j=0; j < ibytes%16; j++)
+ ((unsigned char*)&last_block)[j] = ivec[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, 0, 1);
+
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/
+ tmp1 = _mm_xor_si128(Y, KEY[0]);
+ for (j=1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ T = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ }
+
+ gfmul(H,H,&H2);
+ gfmul(H,H2,&H3);
+ gfmul(H,H3,&H4);
+
+ for (i=0; i<abytes/16/4; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]);
+ tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]);
+ tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]);
+ tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]);
+
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+ tmp1 = _mm_xor_si128(X, tmp1);
+
+ reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X);
+ }
+ for (i=i*4; i<abytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X,tmp1);
+ gfmul(X, H, &X);
+ }
+ if (abytes%16) {
+ last_block = _mm_setzero_si128();
+ for (j=0; j<abytes%16; j++)
+ ((unsigned char*)&last_block)[j] = addt[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X,tmp1);
+ gfmul(X,H,&X);
+ }
+
+ ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ ctr2 = _mm_add_epi64(ctr1, ONE);
+ ctr3 = _mm_add_epi64(ctr2, ONE);
+ ctr4 = _mm_add_epi64(ctr3, ONE);
+ ctr5 = _mm_add_epi64(ctr4, ONE);
+ ctr6 = _mm_add_epi64(ctr5, ONE);
+ ctr7 = _mm_add_epi64(ctr6, ONE);
+ ctr8 = _mm_add_epi64(ctr7, ONE);
+
+ for (i=0; i<nbytes/16/8; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
+ tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
+ tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
+ tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
+ tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
+ tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
+ tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
+
+ ctr1 = _mm_add_epi64(ctr1, EIGHT);
+ ctr2 = _mm_add_epi64(ctr2, EIGHT);
+ ctr3 = _mm_add_epi64(ctr3, EIGHT);
+ ctr4 = _mm_add_epi64(ctr4, EIGHT);
+ ctr5 = _mm_add_epi64(ctr5, EIGHT);
+ ctr6 = _mm_add_epi64(ctr6, EIGHT);
+ ctr7 = _mm_add_epi64(ctr7, EIGHT);
+ ctr8 = _mm_add_epi64(ctr8, EIGHT);
+
+ tmp1 =_mm_xor_si128(tmp1, KEY[0]);
+ tmp2 =_mm_xor_si128(tmp2, KEY[0]);
+ tmp3 =_mm_xor_si128(tmp3, KEY[0]);
+ tmp4 =_mm_xor_si128(tmp4, KEY[0]);
+ tmp5 =_mm_xor_si128(tmp5, KEY[0]);
+ tmp6 =_mm_xor_si128(tmp6, KEY[0]);
+ tmp7 =_mm_xor_si128(tmp7, KEY[0]);
+ tmp8 =_mm_xor_si128(tmp8, KEY[0]);
+
+ for (j=1; j<nr; j++) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[j]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[j]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[j]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[j]);
+ }
+ tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]);
+ tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]);
+ tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]);
+ tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]);
+ tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]);
+ tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]);
+ tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]);
+
+ tmp1 = _mm_xor_si128(tmp1,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+0]));
+ tmp2 = _mm_xor_si128(tmp2,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+1]));
+ tmp3 = _mm_xor_si128(tmp3,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+2]));
+ tmp4 = _mm_xor_si128(tmp4,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+3]));
+ tmp5 = _mm_xor_si128(tmp5,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+4]));
+ tmp6 = _mm_xor_si128(tmp6,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+5]));
+ tmp7 = _mm_xor_si128(tmp7,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+6]));
+ tmp8 = _mm_xor_si128(tmp8,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+7]));
+
+ _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8);
+
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK);
+
+ tmp1 = _mm_xor_si128(X, tmp1);
+
+ reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X);
+
+ tmp5 = _mm_xor_si128(X, tmp5);
+ reduce4(H, H2, H3, H4, tmp8, tmp7, tmp6, tmp5, &X);
+ }
+ for (k=i*8; k<nbytes/16; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j=1; j<nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X,H,&X);
+ }
+ //If remains one incomplete block
+ if (nbytes%16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j=1; j<nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ last_block = tmp1;
+ for (j=0; j<nbytes%16; j++)
+ out[k*16+j] = ((unsigned char*)&last_block)[j];
+ for ((void)j; j<16; j++)
+ ((unsigned char*)&last_block)[j] = 0;
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X, H, &X);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1);
+
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X,H,&X);
+ X = _mm_shuffle_epi8(X, BSWAP_MASK);
+ T = _mm_xor_si128(X, T);
+ _mm_storeu_si128((__m128i*)tag, T);
+}
+
+/* My modification of _encrypt to be _decrypt */
+int
+AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char *addt, const unsigned char *ivec,
+ const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes,
+ const unsigned char *key, int nr)
+{
+ int i, j ,k;
+ __m128i tmp1, tmp2, tmp3, tmp4;
+ __m128i tmp5, tmp6, tmp7, tmp8;
+ __m128i H, H2, H3, H4, Y, T;
+ __m128i *KEY = (__m128i*)key;
+ __m128i ctr1, ctr2, ctr3, ctr4;
+ __m128i ctr5, ctr6, ctr7, ctr8;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i ONE = _mm_set_epi32(0, 1, 0, 0);
+ __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0);
+ __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,
+ 7);
+ __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15);
+ __m128i X = _mm_setzero_si128();
+
+ if (ibytes == 96/8) {
+ Y = _mm_loadu_si128((__m128i*)ivec);
+ Y = _mm_insert_epi32(Y, 0x1000000, 3);
+ /*(Compute E[ZERO, KS] and E[Y0, KS] together*/
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ tmp2 = _mm_xor_si128(Y, KEY[0]);
+ for (j=1; j < nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]);
+
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ T = _mm_aesenclast_si128(tmp2, KEY[nr]);
+
+ H = _mm_shuffle_epi8(H, BSWAP_MASK);
+ } else {
+ tmp1 = _mm_xor_si128(X, KEY[0]);
+ for (j=1; j <nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ H = _mm_aesenclast_si128(tmp1, KEY[nr]);
+
+ H = _mm_shuffle_epi8(H, BSWAP_MASK);
+ Y = _mm_setzero_si128();
+
+ for (i=0; i < ibytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ }
+ if (ibytes%16) {
+ for (j=0; j < ibytes%16; j++)
+ ((unsigned char*)&last_block)[j] = ivec[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, 0, 1);
+
+ Y = _mm_xor_si128(Y, tmp1);
+ gfmul(Y, H, &Y);
+ Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/
+ tmp1 = _mm_xor_si128(Y, KEY[0]);
+ for (j=1; j < nr; j++)
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ T = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ }
+
+ gfmul(H,H,&H2);
+ gfmul(H,H2,&H3);
+ gfmul(H,H3,&H4);
+
+ for (i=0; i<abytes/16/4; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]);
+ tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]);
+ tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]);
+ tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]);
+
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+
+ tmp1 = _mm_xor_si128(X, tmp1);
+
+ reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X);
+ }
+ for (i=i*4; i<abytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X,tmp1);
+ gfmul(X, H, &X);
+ }
+ if (abytes%16) {
+ last_block = _mm_setzero_si128();
+ for (j=0; j<abytes%16; j++)
+ ((unsigned char*)&last_block)[j] = addt[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X,tmp1);
+ gfmul(X,H,&X);
+ }
+
+ /* This is where we validate the cipher text before decrypt */
+ for (i = 0; i<nbytes/16/4; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)in)[i*4]);
+ tmp2 = _mm_loadu_si128(&((__m128i*)in)[i*4+1]);
+ tmp3 = _mm_loadu_si128(&((__m128i*)in)[i*4+2]);
+ tmp4 = _mm_loadu_si128(&((__m128i*)in)[i*4+3]);
+
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+
+ tmp1 = _mm_xor_si128(X, tmp1);
+
+ reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X);
+ }
+ for (i = i*4; i<nbytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)in)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X,H,&X);
+ }
+ if (nbytes%16) {
+ last_block = _mm_setzero_si128();
+ for (j=0; j<nbytes%16; j++)
+ ((unsigned char*)&last_block)[j] = in[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X, H, &X);
+ }
+
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1);
+
+ X = _mm_xor_si128(X, tmp1);
+ gfmul(X,H,&X);
+ X = _mm_shuffle_epi8(X, BSWAP_MASK);
+ T = _mm_xor_si128(X, T);
+
+ if (!m128icmp(T, _mm_loadu_si128((const __m128i*)tag)))
+ return 0; //in case the authentication failed
+
+ ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ ctr2 = _mm_add_epi64(ctr1, ONE);
+ ctr3 = _mm_add_epi64(ctr2, ONE);
+ ctr4 = _mm_add_epi64(ctr3, ONE);
+ ctr5 = _mm_add_epi64(ctr4, ONE);
+ ctr6 = _mm_add_epi64(ctr5, ONE);
+ ctr7 = _mm_add_epi64(ctr6, ONE);
+ ctr8 = _mm_add_epi64(ctr7, ONE);
+
+ for (i=0; i<nbytes/16/8; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
+ tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
+ tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
+ tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
+ tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
+ tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
+ tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
+
+ ctr1 = _mm_add_epi64(ctr1, EIGHT);
+ ctr2 = _mm_add_epi64(ctr2, EIGHT);
+ ctr3 = _mm_add_epi64(ctr3, EIGHT);
+ ctr4 = _mm_add_epi64(ctr4, EIGHT);
+ ctr5 = _mm_add_epi64(ctr5, EIGHT);
+ ctr6 = _mm_add_epi64(ctr6, EIGHT);
+ ctr7 = _mm_add_epi64(ctr7, EIGHT);
+ ctr8 = _mm_add_epi64(ctr8, EIGHT);
+
+ tmp1 =_mm_xor_si128(tmp1, KEY[0]);
+ tmp2 =_mm_xor_si128(tmp2, KEY[0]);
+ tmp3 =_mm_xor_si128(tmp3, KEY[0]);
+ tmp4 =_mm_xor_si128(tmp4, KEY[0]);
+ tmp5 =_mm_xor_si128(tmp5, KEY[0]);
+ tmp6 =_mm_xor_si128(tmp6, KEY[0]);
+ tmp7 =_mm_xor_si128(tmp7, KEY[0]);
+ tmp8 =_mm_xor_si128(tmp8, KEY[0]);
+
+ for (j=1; j<nr; j++) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[j]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[j]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[j]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[j]);
+ }
+ tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]);
+ tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]);
+ tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]);
+ tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]);
+ tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]);
+ tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]);
+ tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]);
+
+ tmp1 = _mm_xor_si128(tmp1,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+0]));
+ tmp2 = _mm_xor_si128(tmp2,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+1]));
+ tmp3 = _mm_xor_si128(tmp3,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+2]));
+ tmp4 = _mm_xor_si128(tmp4,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+3]));
+ tmp5 = _mm_xor_si128(tmp5,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+4]));
+ tmp6 = _mm_xor_si128(tmp6,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+5]));
+ tmp7 = _mm_xor_si128(tmp7,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+6]));
+ tmp8 = _mm_xor_si128(tmp8,
+ _mm_loadu_si128(&((__m128i*)in)[i*8+7]));
+
+ _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7);
+ _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8);
+
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK);
+ }
+ for (k=i*8; k<nbytes/16; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi64(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j=1; j<nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ }
+ //If remains one incomplete block
+ if (nbytes%16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ for (j=1; j<nr-1; j+=2) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
+ }
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
+ tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ last_block = tmp1;
+ for (j=0; j<nbytes%16; j++)
+ out[k*16+j] = ((unsigned char*)&last_block)[j];
+ }
+ return 1; //when sucessfull returns 1
+}
diff --git a/sys/crypto/aesni/aesni_os.h b/sys/crypto/aesni/aesni_os.h
new file mode 100644
index 0000000..273c3f9
--- /dev/null
+++ b/sys/crypto/aesni/aesni_os.h
@@ -0,0 +1,33 @@
+/*-
+ * Copyright 2015 Craig Rodrigues <rodrigc@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#if defined(__GNUC__) && defined(_KERNEL)
+/* Suppress inclusion of gcc's mm_malloc.h header */
+#define _MM_MALLOC_H_INCLUDED 1
+#endif
diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c
index 39819a6..e5e2a69 100644
--- a/sys/crypto/aesni/aesni_wrap.c
+++ b/sys/crypto/aesni/aesni_wrap.c
@@ -3,8 +3,13 @@
* Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
* Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
+ * Copyright (c) 2014 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -29,15 +34,18 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-
+
#include <sys/param.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <crypto/aesni/aesni.h>
-
+
+#include <opencrypto/gmac.h>
+
#include "aesencdec.h"
+#include <smmintrin.h>
MALLOC_DECLARE(M_AESNI);
@@ -176,6 +184,104 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
}
}
+/*
+ * mixed endian increment, low 64bits stored in hi word to be compatible
+ * with _icm's BSWAP.
+ */
+static inline __m128i
+nextc(__m128i x)
+{
+ const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
+ const __m128i ZERO = _mm_setzero_si128();
+
+ x = _mm_add_epi64(x, ONE);
+ __m128i t = _mm_cmpeq_epi64(x, ZERO);
+ t = _mm_unpackhi_epi64(t, ZERO);
+ x = _mm_sub_epi64(x, t);
+
+ return x;
+}
+
+void
+aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
+ const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
+{
+ __m128i tot;
+ __m128i tmp1, tmp2, tmp3, tmp4;
+ __m128i tmp5, tmp6, tmp7, tmp8;
+ __m128i ctr1, ctr2, ctr3, ctr4;
+ __m128i ctr5, ctr6, ctr7, ctr8;
+ __m128i BSWAP_EPI64;
+ __m128i tout[8];
+ struct blocks8 *top;
+ const struct blocks8 *blks;
+ size_t i, cnt;
+
+ BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
+
+ ctr1 = _mm_loadu_si128((__m128i*)iv);
+ ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+
+ cnt = len / AES_BLOCK_LEN / 8;
+ for (i = 0; i < cnt; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr2 = nextc(ctr1);
+ tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
+ ctr3 = nextc(ctr2);
+ tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
+ ctr4 = nextc(ctr3);
+ tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
+ ctr5 = nextc(ctr4);
+ tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
+ ctr6 = nextc(ctr5);
+ tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
+ ctr7 = nextc(ctr6);
+ tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
+ ctr8 = nextc(ctr7);
+ tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
+ ctr1 = nextc(ctr8);
+
+ blks = (const struct blocks8 *)from;
+ top = (struct blocks8 *)to;
+ aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
+ tmp5, tmp6, tmp7, tmp8, tout);
+
+ top->blk[0] = blks->blk[0] ^ tout[0];
+ top->blk[1] = blks->blk[1] ^ tout[1];
+ top->blk[2] = blks->blk[2] ^ tout[2];
+ top->blk[3] = blks->blk[3] ^ tout[3];
+ top->blk[4] = blks->blk[4] ^ tout[4];
+ top->blk[5] = blks->blk[5] ^ tout[5];
+ top->blk[6] = blks->blk[6] ^ tout[6];
+ top->blk[7] = blks->blk[7] ^ tout[7];
+
+ from += AES_BLOCK_LEN * 8;
+ to += AES_BLOCK_LEN * 8;
+ }
+ i *= 8;
+ cnt = len / AES_BLOCK_LEN;
+ for (; i < cnt; i++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = nextc(ctr1);
+
+ tot = aesni_enc(rounds - 1, key_schedule, tmp1);
+
+ tot = tot ^ _mm_loadu_si128((const __m128i *)from);
+ _mm_storeu_si128((__m128i *)to, tot);
+
+ from += AES_BLOCK_LEN;
+ to += AES_BLOCK_LEN;
+ }
+
+ /* handle remaining partial round */
+ if (len % AES_BLOCK_LEN != 0) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tot = aesni_enc(rounds - 1, key_schedule, tmp1);
+ tot = tot ^ _mm_loadu_si128((const __m128i *)from);
+ memcpy(to, &tot, len % AES_BLOCK_LEN);
+ }
+}
+
#define AES_XTS_BLOCKSIZE 16
#define AES_XTS_IVSIZE 8
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
@@ -333,8 +439,15 @@ int
aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
int keylen)
{
+ int decsched;
+
+ decsched = 1;
switch (ses->algo) {
+ case CRYPTO_AES_ICM:
+ case CRYPTO_AES_NIST_GCM_16:
+ decsched = 0;
+ /* FALLTHROUGH */
case CRYPTO_AES_CBC:
switch (keylen) {
case 128:
@@ -347,6 +460,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
ses->rounds = AES256_ROUNDS;
break;
default:
+ CRYPTDEB("invalid CBC/ICM/GCM key length");
return (EINVAL);
}
break;
@@ -359,6 +473,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
ses->rounds = AES256_ROUNDS;
break;
default:
+ CRYPTDEB("invalid XTS key length");
return (EINVAL);
}
break;
@@ -367,13 +482,13 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
}
aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
- aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
- if (ses->algo == CRYPTO_AES_CBC)
- arc4rand(ses->iv, sizeof(ses->iv), 0);
- else /* if (ses->algo == CRYPTO_AES_XTS) */ {
+ if (decsched)
+ aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
+ ses->rounds);
+
+ if (ses->algo == CRYPTO_AES_XTS)
aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
ses->rounds);
- }
return (0);
}
diff --git a/sys/crypto/rijndael/rijndael-api-fst.c b/sys/crypto/rijndael/rijndael-api-fst.c
index 187177b..bf7b4d1 100644
--- a/sys/crypto/rijndael/rijndael-api-fst.c
+++ b/sys/crypto/rijndael/rijndael-api-fst.c
@@ -34,7 +34,8 @@ __FBSDID("$FreeBSD$");
typedef u_int8_t BYTE;
-int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) {
+int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen,
+ const char *keyMaterial) {
u_int8_t cipherKey[RIJNDAEL_MAXKB];
if (key == NULL) {
@@ -83,7 +84,7 @@ int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) {
}
int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputLen, BYTE *outBuffer) {
+ const BYTE *input, int inputLen, BYTE *outBuffer) {
int i, k, numBlocks;
u_int8_t block[16], iv[4][4];
@@ -198,7 +199,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key,
* @return length in octets (not bits) of the encrypted output buffer.
*/
int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputOctets, BYTE *outBuffer) {
+ const BYTE *input, int inputOctets, BYTE *outBuffer) {
int i, numBlocks, padLen;
u_int8_t block[16], *iv, *cp;
@@ -232,10 +233,10 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
case MODE_CBC:
iv = cipher->IV;
for (i = numBlocks; i > 0; i--) {
- ((u_int32_t*)block)[0] = ((u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0];
- ((u_int32_t*)block)[1] = ((u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1];
- ((u_int32_t*)block)[2] = ((u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2];
- ((u_int32_t*)block)[3] = ((u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3];
+ ((u_int32_t*)block)[0] = ((const u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0];
+ ((u_int32_t*)block)[1] = ((const u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1];
+ ((u_int32_t*)block)[2] = ((const u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2];
+ ((u_int32_t*)block)[3] = ((const u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3];
rijndaelEncrypt(key->rk, key->Nr, block, outBuffer);
iv = outBuffer;
input += 16;
@@ -261,7 +262,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
}
int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputLen, BYTE *outBuffer) {
+ const BYTE *input, int inputLen, BYTE *outBuffer) {
int i, k, numBlocks;
u_int8_t block[16], iv[4][4];
@@ -360,7 +361,7 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key,
}
int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputOctets, BYTE *outBuffer) {
+ const BYTE *input, int inputOctets, BYTE *outBuffer) {
int i, numBlocks, padLen;
u_int8_t block[16];
u_int32_t iv[4];
diff --git a/sys/crypto/rijndael/rijndael-api-fst.h b/sys/crypto/rijndael/rijndael-api-fst.h
index 122bf52..e5f596a 100644
--- a/sys/crypto/rijndael/rijndael-api-fst.h
+++ b/sys/crypto/rijndael/rijndael-api-fst.h
@@ -56,18 +56,18 @@ typedef struct { /* changed order of the components */
/* Function prototypes */
-int rijndael_makeKey(keyInstance *, u_int8_t, int, char *);
+int rijndael_makeKey(keyInstance *, u_int8_t, int, const char *);
int rijndael_cipherInit(cipherInstance *, u_int8_t, char *);
-int rijndael_blockEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
-int rijndael_padEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
+int rijndael_blockEncrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
+int rijndael_padEncrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
-int rijndael_blockDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
-int rijndael_padDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
+int rijndael_blockDecrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
+int rijndael_padDecrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
#endif /* __RIJNDAEL_API_FST_H */
diff --git a/sys/crypto/sha1.h b/sys/crypto/sha1.h
index 3686d7d..d32aa8a 100644
--- a/sys/crypto/sha1.h
+++ b/sys/crypto/sha1.h
@@ -53,6 +53,7 @@ struct sha1_ctxt {
} m;
u_int8_t count;
};
+typedef struct sha1_ctxt SHA1_CTX;
#ifdef _KERNEL
extern void sha1_init(struct sha1_ctxt *);
@@ -61,7 +62,6 @@ extern void sha1_loop(struct sha1_ctxt *, const u_int8_t *, size_t);
extern void sha1_result(struct sha1_ctxt *, caddr_t);
/* compatibilty with other SHA1 source codes */
-typedef struct sha1_ctxt SHA1_CTX;
#define SHA1Init(x) sha1_init((x))
#define SHA1Update(x, y, z) sha1_loop((x), (y), (z))
#define SHA1Final(x, y) sha1_result((y), (x))
diff --git a/sys/crypto/sha2/sha2.c b/sys/crypto/sha2/sha2.c
index 66ec6e9..5bb52f3 100644
--- a/sys/crypto/sha2/sha2.c
+++ b/sys/crypto/sha2/sha2.c
@@ -121,20 +121,10 @@ __FBSDID("$FreeBSD$");
* Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t
* types and pointing out recent ANSI C support for uintXX_t in inttypes.h.
*/
-#if 0 /*def SHA2_USE_INTTYPES_H*/
-
typedef uint8_t sha2_byte; /* Exactly 1 byte */
typedef uint32_t sha2_word32; /* Exactly 4 bytes */
typedef uint64_t sha2_word64; /* Exactly 8 bytes */
-#else /* SHA2_USE_INTTYPES_H */
-
-typedef u_int8_t sha2_byte; /* Exactly 1 byte */
-typedef u_int32_t sha2_word32; /* Exactly 4 bytes */
-typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
-
-#endif /* SHA2_USE_INTTYPES_H */
-
/*** SHA-256/384/512 Various Length Definitions ***********************/
/* NOTE: Most of these are in sha2.h */
@@ -183,8 +173,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
*/
/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
#define R(b,x) ((x) >> (b))
-/* 32-bit Rotate-right (used in SHA-256): */
-#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b))))
/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b))))
@@ -192,12 +180,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-/* Four of six logical functions used in SHA-256: */
-#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x)))
-#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x)))
-
/* Four of six logical functions used in SHA-384 and SHA-512: */
#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
@@ -210,43 +192,10 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
* only.
*/
static void SHA512_Last(SHA512_CTX*);
-static void SHA256_Transform(SHA256_CTX*, const sha2_word32*);
static void SHA512_Transform(SHA512_CTX*, const sha2_word64*);
/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
-/* Hash constant words K for SHA-256: */
-static const sha2_word32 K256[64] = {
- 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
- 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
- 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
- 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
- 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
- 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
- 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
- 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
- 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
- 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
- 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
- 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
- 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
- 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
- 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
- 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
-};
-
-/* Initial hash value H for SHA-256: */
-static const sha2_word32 sha256_initial_hash_value[8] = {
- 0x6a09e667UL,
- 0xbb67ae85UL,
- 0x3c6ef372UL,
- 0xa54ff53aUL,
- 0x510e527fUL,
- 0x9b05688cUL,
- 0x1f83d9abUL,
- 0x5be0cd19UL
-};
-
/* Hash constant words K for SHA-384 and SHA-512: */
static const sha2_word64 K512[80] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
@@ -323,301 +272,6 @@ static const char *sha2_hex_digits = "0123456789abcdef";
/*** SHA-256: *********************************************************/
-void SHA256_Init(SHA256_CTX* context) {
- if (context == (SHA256_CTX*)0) {
- return;
- }
- bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH);
- bzero(context->buffer, SHA256_BLOCK_LENGTH);
- context->bitcount = 0;
-}
-
-#ifdef SHA2_UNROLL_TRANSFORM
-
-/* Unrolled SHA-256 round macros: */
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-
-#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \
- REVERSE32(*data++, W256[j]); \
- T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
- K256[j] + W256[j]; \
- (d) += T1; \
- (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
- j++
-
-
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \
- T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
- K256[j] + (W256[j] = *data++); \
- (d) += T1; \
- (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
- j++
-
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND256(a,b,c,d,e,f,g,h) \
- s0 = W256[(j+1)&0x0f]; \
- s0 = sigma0_256(s0); \
- s1 = W256[(j+14)&0x0f]; \
- s1 = sigma1_256(s1); \
- T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \
- (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
- (d) += T1; \
- (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
- j++
-
-static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
- sha2_word32 a, b, c, d, e, f, g, h, s0, s1;
- sha2_word32 T1, *W256;
- int j;
-
- W256 = (sha2_word32*)context->buffer;
-
- /* Initialize registers with the prev. intermediate value */
- a = context->state[0];
- b = context->state[1];
- c = context->state[2];
- d = context->state[3];
- e = context->state[4];
- f = context->state[5];
- g = context->state[6];
- h = context->state[7];
-
- j = 0;
- do {
- /* Rounds 0 to 15 (unrolled): */
- ROUND256_0_TO_15(a,b,c,d,e,f,g,h);
- ROUND256_0_TO_15(h,a,b,c,d,e,f,g);
- ROUND256_0_TO_15(g,h,a,b,c,d,e,f);
- ROUND256_0_TO_15(f,g,h,a,b,c,d,e);
- ROUND256_0_TO_15(e,f,g,h,a,b,c,d);
- ROUND256_0_TO_15(d,e,f,g,h,a,b,c);
- ROUND256_0_TO_15(c,d,e,f,g,h,a,b);
- ROUND256_0_TO_15(b,c,d,e,f,g,h,a);
- } while (j < 16);
-
- /* Now for the remaining rounds to 64: */
- do {
- ROUND256(a,b,c,d,e,f,g,h);
- ROUND256(h,a,b,c,d,e,f,g);
- ROUND256(g,h,a,b,c,d,e,f);
- ROUND256(f,g,h,a,b,c,d,e);
- ROUND256(e,f,g,h,a,b,c,d);
- ROUND256(d,e,f,g,h,a,b,c);
- ROUND256(c,d,e,f,g,h,a,b);
- ROUND256(b,c,d,e,f,g,h,a);
- } while (j < 64);
-
- /* Compute the current intermediate hash value */
- context->state[0] += a;
- context->state[1] += b;
- context->state[2] += c;
- context->state[3] += d;
- context->state[4] += e;
- context->state[5] += f;
- context->state[6] += g;
- context->state[7] += h;
-
- /* Clean up */
- a = b = c = d = e = f = g = h = T1 = 0;
-}
-
-#else /* SHA2_UNROLL_TRANSFORM */
-
-static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
- sha2_word32 a, b, c, d, e, f, g, h, s0, s1;
- sha2_word32 T1, T2, *W256;
- int j;
-
- W256 = (sha2_word32*)context->buffer;
-
- /* Initialize registers with the prev. intermediate value */
- a = context->state[0];
- b = context->state[1];
- c = context->state[2];
- d = context->state[3];
- e = context->state[4];
- f = context->state[5];
- g = context->state[6];
- h = context->state[7];
-
- j = 0;
- do {
-#if BYTE_ORDER == LITTLE_ENDIAN
- /* Copy data while converting to host byte order */
- REVERSE32(*data++,W256[j]);
- /* Apply the SHA-256 compression function to update a..h */
- T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
- /* Apply the SHA-256 compression function to update a..h with copy */
- T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++);
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
- T2 = Sigma0_256(a) + Maj(a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + T1;
- d = c;
- c = b;
- b = a;
- a = T1 + T2;
-
- j++;
- } while (j < 16);
-
- do {
- /* Part of the message block expansion: */
- s0 = W256[(j+1)&0x0f];
- s0 = sigma0_256(s0);
- s1 = W256[(j+14)&0x0f];
- s1 = sigma1_256(s1);
-
- /* Apply the SHA-256 compression function to update a..h */
- T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] +
- (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0);
- T2 = Sigma0_256(a) + Maj(a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + T1;
- d = c;
- c = b;
- b = a;
- a = T1 + T2;
-
- j++;
- } while (j < 64);
-
- /* Compute the current intermediate hash value */
- context->state[0] += a;
- context->state[1] += b;
- context->state[2] += c;
- context->state[3] += d;
- context->state[4] += e;
- context->state[5] += f;
- context->state[6] += g;
- context->state[7] += h;
-
- /* Clean up */
- a = b = c = d = e = f = g = h = T1 = T2 = 0;
-}
-
-#endif /* SHA2_UNROLL_TRANSFORM */
-
-void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
- unsigned int freespace, usedspace;
-
- if (len == 0) {
- /* Calling with no data is valid - we do nothing */
- return;
- }
-
- /* Sanity check: */
- assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0);
-
- usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
- if (usedspace > 0) {
- /* Calculate how much free space is available in the buffer */
- freespace = SHA256_BLOCK_LENGTH - usedspace;
-
- if (len >= freespace) {
- /* Fill the buffer completely and process it */
- bcopy(data, &context->buffer[usedspace], freespace);
- context->bitcount += freespace << 3;
- len -= freespace;
- data += freespace;
- SHA256_Transform(context, (sha2_word32*)context->buffer);
- } else {
- /* The buffer is not yet full */
- bcopy(data, &context->buffer[usedspace], len);
- context->bitcount += len << 3;
- /* Clean up: */
- usedspace = freespace = 0;
- return;
- }
- }
- while (len >= SHA256_BLOCK_LENGTH) {
- /* Process as many complete blocks as we can */
- SHA256_Transform(context, (const sha2_word32*)data);
- context->bitcount += SHA256_BLOCK_LENGTH << 3;
- len -= SHA256_BLOCK_LENGTH;
- data += SHA256_BLOCK_LENGTH;
- }
- if (len > 0) {
- /* There's left-overs, so save 'em */
- bcopy(data, context->buffer, len);
- context->bitcount += len << 3;
- }
- /* Clean up: */
- usedspace = freespace = 0;
-}
-
-void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
- sha2_word32 *d = (sha2_word32*)digest;
- unsigned int usedspace;
-
- /* Sanity check: */
- assert(context != (SHA256_CTX*)0);
-
- /* If no digest buffer is passed, we don't bother doing this: */
- if (digest != (sha2_byte*)0) {
- usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
-#if BYTE_ORDER == LITTLE_ENDIAN
- /* Convert FROM host byte order */
- REVERSE64(context->bitcount,context->bitcount);
-#endif
- if (usedspace > 0) {
- /* Begin padding with a 1 bit: */
- context->buffer[usedspace++] = 0x80;
-
- if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) {
- /* Set-up for the last transform: */
- bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace);
- } else {
- if (usedspace < SHA256_BLOCK_LENGTH) {
- bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace);
- }
- /* Do second-to-last transform: */
- SHA256_Transform(context, (sha2_word32*)context->buffer);
-
- /* And set-up for the last transform: */
- bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
- }
- } else {
- /* Set-up for the last transform: */
- bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
-
- /* Begin padding with a 1 bit: */
- *context->buffer = 0x80;
- }
- /* Set the bit count: */
- *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
-
- /* Final transform: */
- SHA256_Transform(context, (sha2_word32*)context->buffer);
-
-#if BYTE_ORDER == LITTLE_ENDIAN
- {
- /* Convert TO host byte order */
- int j;
- for (j = 0; j < 8; j++) {
- REVERSE32(context->state[j],context->state[j]);
- *d++ = context->state[j];
- }
- }
-#else
- bcopy(context->state, d, SHA256_DIGEST_LENGTH);
-#endif
- }
-
- /* Clean up state data: */
- bzero(context, sizeof(*context));
- usedspace = 0;
-}
-
char *SHA256_End(SHA256_CTX* context, char buffer[]) {
sha2_byte digest[SHA256_DIGEST_LENGTH], *d = digest;
int i;
@@ -641,7 +295,7 @@ char *SHA256_End(SHA256_CTX* context, char buffer[]) {
return buffer;
}
-char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) {
+char* SHA256_Data(const void *data, unsigned int len, char *digest) {
SHA256_CTX context;
SHA256_Init(&context);
diff --git a/sys/crypto/sha2/sha2.h b/sys/crypto/sha2/sha2.h
index 639d58b..59e91f6 100644
--- a/sys/crypto/sha2/sha2.h
+++ b/sys/crypto/sha2/sha2.h
@@ -56,70 +56,17 @@ extern "C" {
#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)
-/*** SHA-256/384/512 Context Structures *******************************/
-/* NOTE: If your architecture does not define either u_intXX_t types or
- * uintXX_t (from inttypes.h), you may need to define things by hand
- * for your system:
- */
-#if 0
-typedef unsigned char u_int8_t; /* 1-byte (8-bits) */
-typedef unsigned int u_int32_t; /* 4-bytes (32-bits) */
-typedef unsigned long long u_int64_t; /* 8-bytes (64-bits) */
-#endif
-/*
- * Most BSD systems already define u_intXX_t types, as does Linux.
- * Some systems, however, like Compaq's Tru64 Unix instead can use
- * uintXX_t types defined by very recent ANSI C standards and included
- * in the file:
- *
- * #include <inttypes.h>
- *
- * If you choose to use <inttypes.h> then please define:
- *
- * #define SHA2_USE_INTTYPES_H
- *
- * Or on the command line during compile:
- *
- * cc -DSHA2_USE_INTTYPES_H ...
- */
-#if 0 /*def SHA2_USE_INTTYPES_H*/
-
-typedef struct _SHA256_CTX {
- uint32_t state[8];
- uint64_t bitcount;
- uint8_t buffer[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
+/*** SHA-384/512 Context Structures *******************************/
typedef struct _SHA512_CTX {
uint64_t state[8];
uint64_t bitcount[2];
uint8_t buffer[SHA512_BLOCK_LENGTH];
} SHA512_CTX;
-#else /* SHA2_USE_INTTYPES_H */
-
-typedef struct _SHA256_CTX {
- u_int32_t state[8];
- u_int64_t bitcount;
- u_int8_t buffer[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-typedef struct _SHA512_CTX {
- u_int64_t state[8];
- u_int64_t bitcount[2];
- u_int8_t buffer[SHA512_BLOCK_LENGTH];
-} SHA512_CTX;
-
-#endif /* SHA2_USE_INTTYPES_H */
-
typedef SHA512_CTX SHA384_CTX;
-/*** SHA-256/384/512 Function Prototypes ******************************/
-
-void SHA256_Init(SHA256_CTX *);
-void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t);
-void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*);
-char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]);
-char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]);
+/*** SHA-384/512 Function Prototypes ******************************/
void SHA384_Init(SHA384_CTX*);
void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t);
@@ -137,4 +84,6 @@ char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]);
}
#endif /* __cplusplus */
+#include "sha256.h"
+
#endif /* __SHA2_H__ */
diff --git a/sys/crypto/sha2/sha256.h b/sys/crypto/sha2/sha256.h
new file mode 100644
index 0000000..c828379
--- /dev/null
+++ b/sys/crypto/sha2/sha256.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA256_H_
+#define _SHA256_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+typedef struct SHA256Context {
+ uint32_t state[8];
+ uint64_t count;
+ uint8_t buf[64];
+} SHA256_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+
+#ifndef SHA256_Init
+#define SHA256_Init _libmd_SHA256_Init
+#endif
+#ifndef SHA256_Update
+#define SHA256_Update _libmd_SHA256_Update
+#endif
+#ifndef SHA256_Final
+#define SHA256_Final _libmd_SHA256_Final
+#endif
+#ifndef SHA256_End
+#define SHA256_End _libmd_SHA256_End
+#endif
+#ifndef SHA256_File
+#define SHA256_File _libmd_SHA256_File
+#endif
+#ifndef SHA256_FileChunk
+#define SHA256_FileChunk _libmd_SHA256_FileChunk
+#endif
+#ifndef SHA256_Data
+#define SHA256_Data _libmd_SHA256_Data
+#endif
+
+#ifndef SHA256_Transform
+#define SHA256_Transform _libmd_SHA256_Transform
+#endif
+#ifndef SHA256_version
+#define SHA256_version _libmd_SHA256_version
+#endif
+
+void SHA256_Init(SHA256_CTX *);
+void SHA256_Update(SHA256_CTX *, const void *, size_t);
+void SHA256_Final(unsigned char [32], SHA256_CTX *);
+char *SHA256_End(SHA256_CTX *, char *);
+char *SHA256_Data(const void *, unsigned int, char *);
+#ifndef _KERNEL
+char *SHA256_File(const char *, char *);
+char *SHA256_FileChunk(const char *, char *, off_t, off_t);
+#endif
+__END_DECLS
+
+#endif /* !_SHA256_H_ */
diff --git a/sys/crypto/sha2/sha256c.c b/sys/crypto/sha2/sha256c.c
new file mode 100644
index 0000000..da9b02c
--- /dev/null
+++ b/sys/crypto/sha2/sha256c.c
@@ -0,0 +1,316 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include "sha256.h"
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+/* Copy a vector of big-endian uint32_t into a vector of bytes */
+#define be32enc_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+/* Copy a vector of bytes into a vector of big-endian uint32_t */
+#define be32dec_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+#else /* BYTE_ORDER != BIG_ENDIAN */
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form. Assumes len is a multiple of 4.
+ */
+static void
+be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 4; i++)
+ be32enc(dst + i * 4, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint32_t). Assumes len is a multiple of 4.
+ */
+static void
+be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 4; i++)
+ dst[i] = be32dec(src + i * 4);
+}
+
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define SHR(x, n) (x >> n)
+#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
+#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+ t0 = h + S1(e) + Ch(e, f, g) + k; \
+ t1 = S0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1;
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, k) \
+ RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+ S[(66 - i) % 8], S[(67 - i) % 8], \
+ S[(68 - i) % 8], S[(69 - i) % 8], \
+ S[(70 - i) % 8], S[(71 - i) % 8], \
+ W[i] + k)
+
+/*
+ * SHA256 block compression function. The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA256_Transform(uint32_t * state, const unsigned char block[64])
+{
+ uint32_t W[64];
+ uint32_t S[8];
+ uint32_t t0, t1;
+ int i;
+
+ /* 1. Prepare message schedule W. */
+ be32dec_vect(W, block, 64);
+ for (i = 16; i < 64; i++)
+ W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+
+ /* 2. Initialize working variables. */
+ memcpy(S, state, 32);
+
+ /* 3. Mix. */
+ RNDr(S, W, 0, 0x428a2f98);
+ RNDr(S, W, 1, 0x71374491);
+ RNDr(S, W, 2, 0xb5c0fbcf);
+ RNDr(S, W, 3, 0xe9b5dba5);
+ RNDr(S, W, 4, 0x3956c25b);
+ RNDr(S, W, 5, 0x59f111f1);
+ RNDr(S, W, 6, 0x923f82a4);
+ RNDr(S, W, 7, 0xab1c5ed5);
+ RNDr(S, W, 8, 0xd807aa98);
+ RNDr(S, W, 9, 0x12835b01);
+ RNDr(S, W, 10, 0x243185be);
+ RNDr(S, W, 11, 0x550c7dc3);
+ RNDr(S, W, 12, 0x72be5d74);
+ RNDr(S, W, 13, 0x80deb1fe);
+ RNDr(S, W, 14, 0x9bdc06a7);
+ RNDr(S, W, 15, 0xc19bf174);
+ RNDr(S, W, 16, 0xe49b69c1);
+ RNDr(S, W, 17, 0xefbe4786);
+ RNDr(S, W, 18, 0x0fc19dc6);
+ RNDr(S, W, 19, 0x240ca1cc);
+ RNDr(S, W, 20, 0x2de92c6f);
+ RNDr(S, W, 21, 0x4a7484aa);
+ RNDr(S, W, 22, 0x5cb0a9dc);
+ RNDr(S, W, 23, 0x76f988da);
+ RNDr(S, W, 24, 0x983e5152);
+ RNDr(S, W, 25, 0xa831c66d);
+ RNDr(S, W, 26, 0xb00327c8);
+ RNDr(S, W, 27, 0xbf597fc7);
+ RNDr(S, W, 28, 0xc6e00bf3);
+ RNDr(S, W, 29, 0xd5a79147);
+ RNDr(S, W, 30, 0x06ca6351);
+ RNDr(S, W, 31, 0x14292967);
+ RNDr(S, W, 32, 0x27b70a85);
+ RNDr(S, W, 33, 0x2e1b2138);
+ RNDr(S, W, 34, 0x4d2c6dfc);
+ RNDr(S, W, 35, 0x53380d13);
+ RNDr(S, W, 36, 0x650a7354);
+ RNDr(S, W, 37, 0x766a0abb);
+ RNDr(S, W, 38, 0x81c2c92e);
+ RNDr(S, W, 39, 0x92722c85);
+ RNDr(S, W, 40, 0xa2bfe8a1);
+ RNDr(S, W, 41, 0xa81a664b);
+ RNDr(S, W, 42, 0xc24b8b70);
+ RNDr(S, W, 43, 0xc76c51a3);
+ RNDr(S, W, 44, 0xd192e819);
+ RNDr(S, W, 45, 0xd6990624);
+ RNDr(S, W, 46, 0xf40e3585);
+ RNDr(S, W, 47, 0x106aa070);
+ RNDr(S, W, 48, 0x19a4c116);
+ RNDr(S, W, 49, 0x1e376c08);
+ RNDr(S, W, 50, 0x2748774c);
+ RNDr(S, W, 51, 0x34b0bcb5);
+ RNDr(S, W, 52, 0x391c0cb3);
+ RNDr(S, W, 53, 0x4ed8aa4a);
+ RNDr(S, W, 54, 0x5b9cca4f);
+ RNDr(S, W, 55, 0x682e6ff3);
+ RNDr(S, W, 56, 0x748f82ee);
+ RNDr(S, W, 57, 0x78a5636f);
+ RNDr(S, W, 58, 0x84c87814);
+ RNDr(S, W, 59, 0x8cc70208);
+ RNDr(S, W, 60, 0x90befffa);
+ RNDr(S, W, 61, 0xa4506ceb);
+ RNDr(S, W, 62, 0xbef9a3f7);
+ RNDr(S, W, 63, 0xc67178f2);
+
+ /* 4. Mix local working variables into global state */
+ for (i = 0; i < 8; i++)
+ state[i] += S[i];
+}
+
+static unsigned char PAD[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Add padding and terminating bit-count. */
+static void
+SHA256_Pad(SHA256_CTX * ctx)
+{
+ unsigned char len[8];
+ uint32_t r, plen;
+
+ /*
+ * Convert length to a vector of bytes -- we do this now rather
+ * than later because the length will change after we pad.
+ */
+ be64enc(len, ctx->count);
+
+ /* Add 1--64 bytes so that the resulting length is 56 mod 64 */
+ r = (ctx->count >> 3) & 0x3f;
+ plen = (r < 56) ? (56 - r) : (120 - r);
+ SHA256_Update(ctx, PAD, (size_t)plen);
+
+ /* Add the terminating bit-count */
+ SHA256_Update(ctx, len, 8);
+}
+
+/* SHA-256 initialization. Begins a SHA-256 operation. */
+void
+SHA256_Init(SHA256_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0x6A09E667;
+ ctx->state[1] = 0xBB67AE85;
+ ctx->state[2] = 0x3C6EF372;
+ ctx->state[3] = 0xA54FF53A;
+ ctx->state[4] = 0x510E527F;
+ ctx->state[5] = 0x9B05688C;
+ ctx->state[6] = 0x1F83D9AB;
+ ctx->state[7] = 0x5BE0CD19;
+}
+
+/* Add bytes into the hash */
+void
+SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
+{
+ uint64_t bitlen;
+ uint32_t r;
+ const unsigned char *src = in;
+
+ /* Number of bytes left in the buffer from previous updates */
+ r = (ctx->count >> 3) & 0x3f;
+
+ /* Convert the length into a number of bits */
+ bitlen = len << 3;
+
+ /* Update number of bits */
+ ctx->count += bitlen;
+
+ /* Handle the case where we don't need to perform any transforms */
+ if (len < 64 - r) {
+ memcpy(&ctx->buf[r], src, len);
+ return;
+ }
+
+ /* Finish the current block */
+ memcpy(&ctx->buf[r], src, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+ src += 64 - r;
+ len -= 64 - r;
+
+ /* Perform complete blocks */
+ while (len >= 64) {
+ SHA256_Transform(ctx->state, src);
+ src += 64;
+ len -= 64;
+ }
+
+ /* Copy left over data into buffer */
+ memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-256 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx)
+{
+
+ /* Add padding */
+ SHA256_Pad(ctx);
+
+ /* Write the hash */
+ be32enc_vect(digest, ctx->state, 32);
+
+ /* Clear the context state */
+ memset((void *)ctx, 0, sizeof(*ctx));
+}
+
+#ifdef WEAK_REFS
+/* When building libmd, provide weak references. Note: this is not
+ activated in the context of compiling these sources for internal
+ use in libcrypt.
+ */
+#undef SHA256_Init
+__weak_reference(_libmd_SHA256_Init, SHA256_Init);
+#undef SHA256_Update
+__weak_reference(_libmd_SHA256_Update, SHA256_Update);
+#undef SHA256_Final
+__weak_reference(_libmd_SHA256_Final, SHA256_Final);
+#undef SHA256_Transform
+__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
+#endif
diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c
index 9dffc40..c952b63 100644
--- a/sys/crypto/via/padlock_hash.c
+++ b/sys/crypto/via/padlock_hash.c
@@ -75,7 +75,7 @@ struct padlock_sha_ctx {
CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx));
static void padlock_sha_init(struct padlock_sha_ctx *ctx);
-static int padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf,
+static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf,
uint16_t bufsize);
static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx);
static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx);
@@ -83,16 +83,16 @@ static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx);
static struct auth_hash padlock_hmac_sha1 = {
CRYPTO_SHA1_HMAC, "HMAC-SHA1",
20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx),
- (void (*)(void *))padlock_sha_init,
- (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update,
+ (void (*)(void *))padlock_sha_init, NULL, NULL,
+ (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update,
(void (*)(uint8_t *, void *))padlock_sha1_final
};
static struct auth_hash padlock_hmac_sha256 = {
CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256",
32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx),
- (void (*)(void *))padlock_sha_init,
- (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update,
+ (void (*)(void *))padlock_sha_init, NULL, NULL,
+ (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update,
(void (*)(uint8_t *, void *))padlock_sha256_final
};
@@ -167,7 +167,7 @@ padlock_sha_init(struct padlock_sha_ctx *ctx)
}
static int
-padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, uint16_t bufsize)
+padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize)
{
if (ctx->psc_size - ctx->psc_offset < bufsize) {
OpenPOWER on IntegriCloud