summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2002-10-19 17:02:17 +0000
committerphk <phk@FreeBSD.org>2002-10-19 17:02:17 +0000
commitf4a1c1173b3f45e1a14bc1d4c04c14d43383091b (patch)
treef7190d834ab37c4346e4a07e080071a331ff485e /sys
parentb98187d3390cbe9be86f537cbb5de42714f233ce (diff)
downloadFreeBSD-src-f4a1c1173b3f45e1a14bc1d4c04c14d43383091b.zip
FreeBSD-src-f4a1c1173b3f45e1a14bc1d4c04c14d43383091b.tar.gz
Add Geom Based Disk Encryption to the tree.
This is an encryption module designed for to secure denial of access to the contents of "cold disks" with or without destruction activation. Major features: * Based on AES, MD5 and ARC4 algorithms. * Four cryptographic barriers: 1) Pass-phrase encrypts the master key. 2) Pass-phrase + Lock data locates master key. 3) 128 bit key derived from 2048 bit master key protects sector key. 3) 128 bit random single-use sector keys protect data payload. * Up to four different changeable pass-phrases. * Blackening feature for provable destruction of master key material. * Isotropic disk contents offers no information about sector contents. * Configurable destination sector range allows steganographic deployment. This commit adds the kernel part, separate commits will follow for the userland utility and documentation. This software was developed for the FreeBSD Project by Poul-Henning Kamp and NAI Labs, the Security Research Division of Network Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research program. Many thanks to Robert Watson, CBOSS Principal Investigator for making this possible. Sponsored by: DARPA & NAI Labs.
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES1
-rw-r--r--sys/conf/files4
-rw-r--r--sys/conf/options1
-rw-r--r--sys/geom/bde/g_bde.c282
-rw-r--r--sys/geom/bde/g_bde.h150
-rw-r--r--sys/geom/bde/g_bde_crypt.c356
-rw-r--r--sys/geom/bde/g_bde_lock.c311
-rw-r--r--sys/geom/bde/g_bde_work.c731
8 files changed, 1836 insertions, 0 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 81df5c3..02e1d9f 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -115,6 +115,7 @@ options PQ_CACHESIZE=512 # color for 512k/16k cache
options INCLUDE_CONFIG_FILE # Include this file in kernel
options GEOM_AES
+options GEOM_BDE
options GEOM_BSD
options GEOM_GPT
options GEOM_MBR
diff --git a/sys/conf/files b/sys/conf/files
index 2bace6d..c003bc3 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -796,6 +796,10 @@ fs/umapfs/umap_vnops.c optional umapfs
fs/unionfs/union_subr.c optional unionfs
fs/unionfs/union_vfsops.c optional unionfs
fs/unionfs/union_vnops.c optional unionfs
+geom/bde/g_bde.c optional geom_bde
+geom/bde/g_bde_crypt.c optional geom_bde
+geom/bde/g_bde_lock.c optional geom_bde
+geom/bde/g_bde_work.c optional geom_bde
geom/geom_aes.c optional geom_aes
geom/geom_bsd.c optional geom_bsd
geom/geom_ctl.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 8480b1c..0311849 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -88,6 +88,7 @@ GDB_REMOTE_CHAT opt_ddb.h
GDBSPEED opt_ddb.h
NO_GEOM opt_geom.h
GEOM_AES opt_geom.h
+GEOM_BDE opt_geom.h
GEOM_BSD opt_geom.h
GEOM_GPT opt_geom.h
GEOM_MBR opt_geom.h
diff --git a/sys/geom/bde/g_bde.c b/sys/geom/bde/g_bde.c
new file mode 100644
index 0000000..51fd779
--- /dev/null
+++ b/sys/geom/bde/g_bde.c
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+
+#define BDE_CLASS_NAME "BDE"
+
+static void
+g_bde_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_bde_softc *sc;
+
+ gp = bp->bio_to->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ sc = gp->softc;
+ switch (bp->bio_cmd) {
+ case BIO_DELETE:
+ case BIO_READ:
+ case BIO_WRITE:
+ g_bde_start1(bp);
+ break;
+ case BIO_GETATTR:
+ case BIO_SETATTR:
+ if (g_handleattr_off_t(bp, "GEOM::mediasize", sc->mediasize))
+ return;
+ if (g_handleattr_int(bp, "GEOM::sectorsize", sc->sectorsize))
+ return;
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+ return;
+}
+
+static void
+g_bde_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct g_bde_softc *sc;
+ int error;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_orphan(%p/%s)", cp, cp->provider->name);
+ g_topology_assert();
+ KASSERT(cp->provider->error != 0,
+ ("g_bde_orphan with error == 0"));
+
+ gp = cp->geom;
+ sc = gp->softc;
+ gp->flags |= G_GEOM_WITHER;
+ error = cp->provider->error;
+ LIST_FOREACH(pp, &gp->provider, provider)
+ g_orphan_provider(pp, error);
+ bzero(sc, sizeof(struct g_bde_softc)); /* destroy evidence */
+ return;
+}
+
+static int
+g_bde_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) {
+ de++;
+ dr++;
+ }
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) {
+ de--;
+ dr--;
+ }
+ return (g_access_rel(cp, dr, dw, de));
+}
+
+static int
+g_bde_create(struct g_createargs *ga)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ struct g_bde_key *kp;
+ int error;
+ u_int sectorsize;
+ off_t mediasize;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_create(%d)", ga->flag);
+ g_topology_assert();
+ if (ga->flag == 1) {
+ /*
+ * Orderly dettachment.
+ */
+ if (ga->geom != NULL) {
+ gp = ga->geom;
+ } else if (ga->provider != NULL) {
+ if (ga->provider->geom->class == ga->class) {
+ gp = ga->provider->geom;
+ } else {
+ LIST_FOREACH(cp, &ga->provider->consumers,
+ consumers) {
+ if (cp->geom->class == ga->class) {
+ gp = cp->geom;
+ break;
+ }
+ }
+ }
+ if (gp == NULL)
+ return (EINVAL);
+ } else {
+ return (EINVAL);
+ }
+ KASSERT(gp != NULL, ("NULL geom"));
+ pp = LIST_FIRST(&gp->provider);
+ KASSERT(pp != NULL, ("NULL provider"));
+ if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
+ return (EBUSY);
+ g_orphan_provider(pp, ENXIO);
+ sc = gp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("NULL consumer"));
+ sc->dead = 1;
+ wakeup(sc);
+ error = g_access_rel(cp, -1, -1, -1);
+ KASSERT(error == 0, ("error on close"));
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_topology_unlock();
+ while (sc->dead != 2 && !LIST_EMPTY(&pp->consumers))
+ tsleep(sc, PRIBIO, "g_bdedie", hz);
+ g_topology_lock();
+ g_destroy_provider(pp);
+ mtx_destroy(&sc->worklist_mutex);
+ bzero(&sc->key, sizeof sc->key);
+ g_free(sc);
+ g_destroy_geom(gp);
+ return (0);
+ }
+
+ if (ga->flag != 0)
+ return (EOPNOTSUPP);
+
+ if (ga->provider == NULL)
+ return (EINVAL);
+ /*
+ * Attach
+ */
+ gp = g_new_geomf(ga->class, "%s.bde", ga->provider->name);
+ gp->start = g_bde_start;
+ gp->orphan = g_bde_orphan;
+ gp->access = g_bde_access;
+ gp->spoiled = g_std_spoiled;
+ cp = g_new_consumer(gp);
+ g_attach(cp, ga->provider);
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (error);
+ }
+ g_topology_unlock();
+ while (1) {
+ error = g_getattr("GEOM::sectorsize", cp, &sectorsize);
+ if (error)
+ break;
+ error = g_getattr("GEOM::mediasize", cp, &mediasize);
+ if (error)
+ break;
+ sc = g_malloc(sizeof(struct g_bde_softc), M_WAITOK | M_ZERO);
+ gp->softc = sc;
+ sc->geom = gp;
+ sc->consumer = cp;
+
+ error = g_bde_decrypt_lock(sc, ga->ptr,
+ (u_char *)ga->ptr + 256, mediasize, sectorsize, NULL);
+ bzero(sc->arc4_sbox, sizeof sc->arc4_sbox);
+ if (error)
+ break;
+ kp = &sc->key;
+
+ /* Initialize helper-fields */
+ kp->keys_per_sector = kp->sectorsize / G_BDE_SKEYLEN;
+ kp->zone_cont = kp->keys_per_sector * kp->sectorsize;
+ kp->zone_width = kp->zone_cont + kp->sectorsize;
+ kp->media_width = kp->sectorN - kp->sector0 -
+ G_BDE_MAXKEYS * kp->sectorsize;
+
+ /* Our external parameters */
+ sc->zone_cont = kp->zone_cont;
+ sc->mediasize = g_bde_max_sector(kp);
+ sc->sectorsize = kp->sectorsize;
+
+ TAILQ_INIT(&sc->freelist);
+ TAILQ_INIT(&sc->worklist);
+ mtx_init(&sc->worklist_mutex, "g_bde_worklist", NULL, MTX_DEF);
+ mtx_lock(&Giant);
+ /* XXX: error check */
+ kthread_create(g_bde_worker, gp, &sc->thread, 0, 0,
+ "g_bde %s", gp->name);
+ mtx_unlock(&Giant);
+ g_topology_lock();
+ pp = g_new_providerf(gp, gp->name);
+ pp->mediasize = sc->mediasize;
+ g_error_provider(pp, 0);
+ g_topology_unlock();
+ break;
+ }
+ g_topology_lock();
+ if (error == 0) {
+ ga->geom = gp;
+ return (0);
+ } else {
+ g_access_rel(cp, -1, -1, -1);
+ }
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (gp->softc != NULL)
+ g_free(gp->softc);
+ g_destroy_geom(gp);
+ return (error);
+}
+
+static struct g_class g_bde_class = {
+ BDE_CLASS_NAME,
+ NULL,
+ g_bde_create,
+ G_CLASS_INITIALIZER
+};
+
+DECLARE_GEOM_CLASS(g_bde_class, g_bde);
diff --git a/sys/geom/bde/g_bde.h b/sys/geom/bde/g_bde.h
new file mode 100644
index 0000000..df924e4
--- /dev/null
+++ b/sys/geom/bde/g_bde.h
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* These are quite, but not entirely unlike constants. */
+#define G_BDE_MKEYLEN (2048/8)
+#define G_BDE_SKEYBITS 128
+#define G_BDE_SKEYLEN (G_BDE_SKEYBITS/8)
+#define G_BDE_KKEYBITS 128
+#define G_BDE_KKEYLEN (G_BDE_KKEYBITS/8)
+#define G_BDE_MAXKEYS 4
+#define G_BDE_LOCKSIZE 384
+
+/* This just needs to be "large enough" */
+#define G_BDE_KEYBYTES 304
+
+struct g_bde_work;
+struct g_bde_softc;
+
+struct g_bde_sector {
+ struct g_bde_work *owner;
+ struct g_bde_softc *softc;
+ off_t offset;
+ u_int size;
+ u_int ref;
+ void *data;
+ TAILQ_ENTRY(g_bde_sector) list;
+ u_char valid;
+ u_char malloc;
+ enum {JUNK, IO, VALID} state;
+ int error;
+};
+
+struct g_bde_work {
+ struct mtx mutex;
+ off_t offset;
+ off_t length;
+ void *data;
+ struct bio *bp;
+ struct g_bde_softc *softc;
+ off_t so;
+ off_t kso;
+ u_int ko;
+ struct g_bde_sector *sp;
+ struct g_bde_sector *ksp;
+ TAILQ_ENTRY(g_bde_work) list;
+ enum {SETUP, WAIT, FINISH} state;
+ int error;
+};
+
+struct g_bde_key {
+ uint64_t sector0;
+ /* Physical byte offset of first byte used */
+ uint64_t sectorN;
+ /* Physical byte offset of first byte not used */
+ uint64_t keyoffset;
+ uint64_t lsector[G_BDE_MAXKEYS];
+ /* Physical offsets */
+ uint32_t sectorsize;
+ uint32_t flags;
+ uint8_t hash[16];
+ uint8_t spare[48];
+ uint8_t key[G_BDE_MKEYLEN];
+ /* Non-stored help-fields */
+ uint64_t zone_width; /* On-disk width of zone */
+ uint64_t zone_cont; /* Payload width of zone */
+ uint64_t media_width; /* Non-magic width of zone */
+ u_int keys_per_sector;
+};
+
+struct g_bde_softc {
+ off_t mediasize;
+ u_int sectorsize;
+ uint64_t zone_cont;
+ struct g_geom *geom;
+ struct g_consumer *consumer;
+ TAILQ_HEAD(, g_bde_sector) freelist;
+ TAILQ_HEAD(, g_bde_work) worklist;
+ struct mtx worklist_mutex;
+ struct proc *thread;
+ struct g_bde_key key;
+ u_char arc4_sbox[256];
+ u_char arc4_i, arc4_j;
+ int dead;
+ u_int nwork;
+ u_int nsect;
+ u_int ncache;
+};
+
+/* g_bde_crypt.c */
+void g_bde_crypt_delete(struct g_bde_work *wp);
+void g_bde_crypt_read(struct g_bde_work *wp);
+void g_bde_crypt_write(struct g_bde_work *wp);
+
+/* g_bde_key.c */
+void g_bde_zap_key(struct g_bde_softc *sc);
+int g_bde_get_key(struct g_bde_softc *sc, void *ptr, int len);
+int g_bde_init_keybytes(struct g_bde_softc *sc, char *passp, int len);
+
+/* g_bde_lock .c */
+void g_bde_encode_lock(struct g_bde_key *gl, u_char *ptr);
+void g_bde_decode_lock(struct g_bde_key *gl, u_char *ptr);
+u_char g_bde_arc4(struct g_bde_softc *sc);
+void g_bde_arc4_seq(struct g_bde_softc *sc, void *ptr, u_int len);
+void g_bde_arc4_seed(struct g_bde_softc *sc, void *ptr, u_int len);
+int g_bde_keyloc_encrypt(struct g_bde_softc *sc, void *input, void *output);
+int g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, void *output);
+int g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *sbox, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey);
+
+/* g_bde_math .c */
+uint64_t g_bde_max_sector(struct g_bde_key *lp);
+void g_bde_map_sector(struct g_bde_key *lp, uint64_t isector, uint64_t *osector, uint64_t *ksector, u_int *koffset);
+
+/* g_bde_work.c */
+void g_bde_start1(struct bio *bp);
+void g_bde_worker(void *arg);
+
diff --git a/sys/geom/bde/g_bde_crypt.c b/sys/geom/bde/g_bde_crypt.c
new file mode 100644
index 0000000..c649e23
--- /dev/null
+++ b/sys/geom/bde/g_bde_crypt.c
@@ -0,0 +1,356 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the functions responsible for the crypto, keying
+ * and mapping operations on the I/O requests.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/libkern.h>
+#include <sys/md5.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+/*
+ * These four functions wrap the raw Rijndael functions and make sure we
+ * explode if something fails which shouldn't.
+ */
+
+static void
+AES_init(cipherInstance *ci)
+{
+ int error;
+
+ error = rijndael_cipherInit(ci, MODE_CBC, NULL);
+ KASSERT(error > 0, ("rijndael_cipherInit %d", error));
+}
+
+static void
+AES_makekey(keyInstance *ki, int dir, u_int len, void *key)
+{
+ int error;
+
+ error = rijndael_makeKey(ki, dir, len, key);
+ KASSERT(error > 0, ("rijndael_makeKey %d", error));
+}
+
+static void
+AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockEncrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockEncrypt %d", error));
+}
+
+static void
+AES_decrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockDecrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockDecrypt %d", error));
+}
+
+/*
+ * Derive kkey from mkey + sector offset.
+ *
+ * Security objective: Derive a potentially very large number of distinct skeys
+ * from the comparatively small key material in our mkey, in such a way that
+ * if one, more or even many of the kkeys are compromised, this does not
+ * significantly help an attack on other kkeys and in particular does not
+ * weaken or compromised the mkey.
+ *
+ * We do this by cherry-picking characters out of the mkey, feeding these to
+ * MD5 with the sector offset in the middle and using the MD5 hash as kkey.
+ *
+ * The MD5 only acts as a "diode" against brute-force reversal, it offsers no
+ * protection if the input to MD5 is predictable or insufficiently uncorrelated
+ * from sector to sector.
+ *
+ * The amount of entropy in a sector number is very low, and the amount of
+ * entropy between two sector numbers is even lower, (only slightly higher than
+ * one bit), so we rely heavily on the mkey to make the cherry picking non-
+ * linear and irreversible.
+ *
+ * This strong dependency on the mkey is very desirable, but the low amount
+ * of entropy from the sector number means that the algorithm is vulnerable
+ * to mkeys which has a lumpy histogram of byte values or little entropy.
+ *
+ * If you read this comment in order to find a weak spot or the best way to
+ * attack GBDE, you have probably come to the right place. Good luck.
+ */
+
+static void
+g_bde_kkey(struct g_bde_softc *sc, keyInstance *ki, int dir, off_t sector)
+{
+ u_int u, v, w, t;
+ MD5_CTX ct;
+ u_char buf[16], c;
+
+ MD5Init(&ct);
+ w = sector /= sc->sectorsize;
+ v = w % 211; /* A prime slightly smaller than G_BDE_MKEYLEN */
+ u = w % 19; /* A small prime */
+ for (t = 0; t < G_BDE_SKEYLEN; t++) {
+ u %= G_BDE_MKEYLEN;
+ v %= G_BDE_MKEYLEN;
+ c = sc->key.key[u] ^ sc->key.key[v];
+ MD5Update(&ct, &c, 1);
+ v += c + t;
+ u += sc->key.key[c];
+ if (w & 1)
+ v += 13; /* A small prime */
+ else
+ u += 131; /* A prime roughly G_BDE_MKEYLEN / 2 */
+ if (t == G_BDE_SKEYLEN / 2)
+ MD5Update(&ct, (void *)&sector, sizeof sector);
+ }
+ w = v = u - 0;
+ MD5Update(&ct, (void *)&sector, sizeof sector);
+ MD5Final(buf, &ct);
+ bzero(&ct, sizeof ct);
+ AES_makekey(ki, dir, G_BDE_KKEYBITS, buf);
+ bzero(buf, sizeof buf);
+}
+
+/*
+ * Encryption work for read operation.
+ *
+ * Security objective: Find the kkey, find the skey, decrypt the sector data.
+ */
+
+void
+g_bde_crypt_read(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+
+ AES_init(&ci);
+ sc = wp->softc;
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_DECRYPT, wp->offset + o);
+ AES_decrypt(&ci, &ki, d, skey, sizeof skey);
+ d = (u_char *)wp->data + o;
+ AES_makekey(&ki, DIR_DECRYPT, G_BDE_SKEYBITS, skey);
+ AES_decrypt(&ci, &ki, d, d, sc->sectorsize);
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for write operation.
+ *
+ * Security objective: Create random skey, encrypt sector data,
+ * encrypt skey with the kkey.
+ */
+
+void
+g_bde_crypt_write(struct g_bde_work *wp)
+{
+ u_char *s, *d;
+ struct g_bde_softc *sc;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+ sc = wp->softc;
+ AES_init(&ci);
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+
+ s = (u_char *)wp->data + o;
+ d = (u_char *)wp->sp->data + o;
+ arc4rand(&skey, sizeof skey, 0);
+ AES_makekey(&ki, DIR_ENCRYPT, G_BDE_SKEYBITS, skey);
+ AES_encrypt(&ci, &ki, s, d, sc->sectorsize);
+
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_ENCRYPT, wp->offset + o);
+ AES_encrypt(&ci, &ki, skey, d, sizeof skey);
+ bzero(skey, sizeof skey);
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for delete operation.
+ *
+ * Security objective: Write random data to the sectors.
+ *
+ * XXX: At a hit in performance we would trash the encrypted skey as well.
+ * XXX: This would add frustration to the cleaning lady attack by making
+ * XXX: deletes look like writes.
+ */
+
+void
+g_bde_crypt_delete(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ off_t o;
+
+ sc = wp->softc;
+ d = wp->sp->data;
+ /*
+ * Do not unroll this loop!
+ * Our zone may be significantly wider than the amount of random
+ * bytes arc4rand likes to give in one reseeding, whereas our
+ * sectorsize is far more likely to be in the same range.
+ */
+ for (o = 0; o < wp->length; o += sc->sectorsize) {
+ arc4rand(d, sc->sectorsize, 0);
+ d += sc->sectorsize;
+ }
+ /*
+ * Having written a long random sequence to disk here, we want to
+ * force a reseed, to avoid weakening the next time we use random
+ * data for something important.
+ */
+ arc4rand(&o, sizeof o, 1);
+}
+
+/*
+ * Calculate the total payload size of the encrypted device.
+ *
+ * Security objectives: none.
+ *
+ * This function needs to agree with g_bde_map_sector() about things.
+ */
+
+uint64_t
+g_bde_max_sector(struct g_bde_key *kp)
+{
+ uint64_t maxsect;
+
+ maxsect = kp->media_width;
+ maxsect /= kp->zone_width;
+ maxsect *= kp->zone_cont;
+ return (maxsect);
+}
+
+/*
+ * Convert an unencrypted side offset to offsets on the encrypted side.
+ *
+ * Security objective: Make it harder to identify what sectors contain what
+ * on a "cold" disk image.
+ *
+ * We do this by adding the "keyoffset" from the lock to the physical sector
+ * number modulus the available number of sectors, since all physical sectors
+ * presumably look the same cold, this should be enough.
+ *
+ * Shuffling things further is an option, but the incremental frustration is
+ * not currently deemed worth the run-time performance hit resulting from the
+ * increased number of disk arm movements it would incur.
+ *
+ * This function offers nothing but a trivial diversion for an attacker able
+ * to do "the cleaning lady attack" in its current static mapping form.
+ */
+
+void
+g_bde_map_sector(struct g_bde_key *kp,
+ uint64_t isector,
+ uint64_t *osector,
+ uint64_t *ksector,
+ u_int *koffset)
+{
+
+ u_int zone, zoff, zidx, u;
+ uint64_t os;
+
+ /* find which zone and the offset and index in it */
+ zone = isector / kp->zone_cont;
+ zoff = isector % kp->zone_cont;
+ zidx = zoff / kp->sectorsize;
+
+ /* Find physical sector address */
+ os = zone * kp->zone_width + zoff;
+ os += kp->keyoffset;
+ os %= kp->media_width - (G_BDE_MAXKEYS * kp->sectorsize);
+ os += kp->sector0;
+
+ /* Compensate for lock sectors */
+ for (u = 0; u < G_BDE_MAXKEYS; u++)
+ if (os >= kp->lsector[u])
+ os += kp->sectorsize;
+
+ *osector = os;
+
+ /* The key sector is the last in this zone. */
+ os = (1 + zone) * kp->zone_width - kp->sectorsize;
+ os += kp->keyoffset;
+ os %= kp->media_width - (G_BDE_MAXKEYS * kp->sectorsize);
+ os += kp->sector0;
+
+ for (u = 0; u < G_BDE_MAXKEYS; u++)
+ if (os >= kp->lsector[u])
+ os += kp->sectorsize;
+ *ksector = os;
+
+ *koffset = zidx * G_BDE_SKEYLEN;
+
+#if 0
+ printf("off %jd %jd %jd %u\n",
+ (intmax_t)isector,
+ (intmax_t)*osector,
+ (intmax_t)*ksector,
+ *koffset);
+#endif
+}
diff --git a/sys/geom/bde/g_bde_lock.c b/sys/geom/bde/g_bde_lock.c
new file mode 100644
index 0000000..e58683f
--- /dev/null
+++ b/sys/geom/bde/g_bde_lock.c
@@ -0,0 +1,311 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This souce file contains routines which operates on the lock sectors, both
+ * for the kernel and the userland program gbde(1).
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/stdint.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/md5.h>
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define g_free(foo) free(foo)
+#endif
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+/*
+ * Encode/Decode the lock structure in byte-sequence format.
+ *
+ * Security objectives: none.
+ *
+ * C-structure packing and byte-endianess depends on architecture, compiler
+ * and compiler options. We therefore explicitly encode and decode struct
+ * g_bde_key using an invariant byte-sequence format.
+ *
+ */
+
+void
+g_bde_encode_lock(struct g_bde_key *gl, u_char *ptr)
+{
+
+ bcopy(gl->hash, ptr + 0, sizeof gl->hash);
+ g_enc_le8(ptr + 16, gl->sector0);
+ g_enc_le8(ptr + 24, gl->sectorN);
+ g_enc_le8(ptr + 32, gl->keyoffset);
+ g_enc_le4(ptr + 40, gl->sectorsize);
+ g_enc_le4(ptr + 44, gl->flags);
+ g_enc_le8(ptr + 48, gl->lsector[0]);
+ g_enc_le8(ptr + 56, gl->lsector[1]);
+ g_enc_le8(ptr + 64, gl->lsector[2]);
+ g_enc_le8(ptr + 72, gl->lsector[3]);
+ bcopy(gl->spare, ptr + 80, sizeof gl->spare);
+ bcopy(gl->key, ptr + 128, sizeof gl->key);
+}
+
+void
+g_bde_decode_lock(struct g_bde_key *gl, u_char *ptr)
+{
+ bcopy(ptr + 0, gl->hash, sizeof gl->hash);
+ gl->sector0 = g_dec_le8(ptr + 16);
+ gl->sectorN = g_dec_le8(ptr + 24);
+ gl->keyoffset = g_dec_le8(ptr + 32);
+ gl->sectorsize = g_dec_le4(ptr + 40);
+ gl->flags = g_dec_le4(ptr + 44);
+ gl->lsector[0] = g_dec_le8(ptr + 48);
+ gl->lsector[1] = g_dec_le8(ptr + 56);
+ gl->lsector[2] = g_dec_le8(ptr + 64);
+ gl->lsector[3] = g_dec_le8(ptr + 72);
+ bcopy(ptr + 80, gl->spare, sizeof gl->spare);
+ bcopy(ptr + 128, gl->key, sizeof gl->key);
+}
+
+/*
+ * Generate key-material used for protecting lock sectors.
+ *
+ * Security objectives: from the pass-phrase provide by the user, produce a
+ * reproducible stream of bits/bytes which resemeble pseudo-random bits.
+ *
+ * This is the stream-cipher algorithm called ARC4. See for instance the
+ * description in "Applied Cryptography" by Bruce Scneier.
+ */
+
+u_char
+g_bde_arc4(struct g_bde_softc *sc)
+{
+ u_char c;
+
+ sc->arc4_j += sc->arc4_sbox[++sc->arc4_i];
+ c = sc->arc4_sbox[sc->arc4_i];
+ sc->arc4_sbox[sc->arc4_i] = sc->arc4_sbox[sc->arc4_j];
+ sc->arc4_sbox[sc->arc4_j] = c;
+ c = sc->arc4_sbox[sc->arc4_i] + sc->arc4_sbox[sc->arc4_j];
+ c = sc->arc4_sbox[c];
+ return (c);
+}
+
+void
+g_bde_arc4_seq(struct g_bde_softc *sc, void *ptr, u_int len)
+{
+ u_char *p;
+
+ p = ptr;
+ while (len--)
+ *p++ = g_bde_arc4(sc);
+}
+
+void
+g_bde_arc4_seed(struct g_bde_softc *sc, void *ptr, u_int len)
+{
+ u_char k[256], *p, c;
+ u_int i;
+
+ p = ptr;
+ sc->arc4_i = 0;
+ bzero(k, sizeof k);
+ while(len--)
+ k[sc->arc4_i++] ^= *p++;
+
+ sc->arc4_j = 0;
+ for (i = 0; i < 256; i++)
+ sc->arc4_sbox[i] = i;
+ for (i = 0; i < 256; i++) {
+ sc->arc4_j += sc->arc4_sbox[i] + k[i];
+ c = sc->arc4_sbox[i];
+ sc->arc4_sbox[i] = sc->arc4_sbox[sc->arc4_j];
+ sc->arc4_sbox[sc->arc4_j] = c;
+ }
+ sc->arc4_i = 0;
+ sc->arc4_j = 0;
+}
+
+/*
+ * Encrypt/Decrypt the metadata address with key-material.
+ */
+
+int
+g_bde_keyloc_encrypt(struct g_bde_softc *sc, void *input, void *output)
+{
+ u_char *p;
+ u_char buf[16], buf1[16];
+ u_int i;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ p = input;
+ g_bde_arc4_seq(sc, buf, sizeof buf);
+ for (i = 0; i < sizeof buf; i++)
+ buf1[i] = p[i] ^ buf[i];
+ g_bde_arc4_seq(sc, buf, sizeof buf);
+ rijndael_makeKey(&ki, DIR_ENCRYPT, G_BDE_KKEYBITS, buf);
+ rijndael_blockEncrypt(&ci, &ki, buf1, 16 * 8, output);
+ bzero(&ci, sizeof ci);
+ return (0);
+}
+
+int
+g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, void *output)
+{
+ u_char *p;
+ u_char buf1[16], buf2[16];
+ u_int i;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ g_bde_arc4_seq(sc, buf1, sizeof buf1);
+ g_bde_arc4_seq(sc, buf2, sizeof buf2);
+ rijndael_makeKey(&ki, DIR_DECRYPT, G_BDE_KKEYBITS, buf2);
+ rijndael_blockDecrypt(&ci, &ki, input, 16 * 8, output);
+ p = output;
+ for (i = 0; i < sizeof buf1; i++)
+ p[i] ^= buf1[i];
+ bzero(&ci, sizeof ci);
+ return (0);
+}
+
+/*
+ * Encode/Decode lock sectors.
+ */
+
+int
+g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *sbox, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey)
+{
+ u_char *buf, k1buf[16], k2buf[G_BDE_LOCKSIZE], k3buf[16], *q;
+ struct g_bde_key *gl;
+ uint64_t off[2];
+ int error, m, i;
+ MD5_CTX c;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ bcopy(sbox, sc->arc4_sbox, 256);
+ sc->arc4_i = 0;
+ sc->arc4_j = 0;
+ gl = &sc->key;
+ error = g_bde_keyloc_decrypt(sc, meta, off);
+ if (error)
+ return(error);
+
+ if (off[0] + G_BDE_LOCKSIZE > (uint64_t)mediasize) {
+ bzero(off, sizeof off);
+ return (ESRCH);
+ }
+ off[1] = 0;
+ m = 1;
+ if (off[0] % sectorsize > sectorsize - G_BDE_LOCKSIZE)
+ m++;
+ buf = g_read_data(sc->consumer,
+ off[0] - (off[0] % sectorsize),
+ m * sectorsize, &error);
+ if (buf == NULL) {
+ off[0] = 0;
+ return(error);
+ }
+
+ q = buf + off[0] % sectorsize;
+
+ off[1] = 0;
+ for (i = 0; i < (int)sizeof(*gl); i++)
+ off[1] += q[i];
+
+ if (off[1] == 0) {
+ off[0] = 0;
+ g_free(buf);
+ return (ESRCH);
+ }
+
+ g_bde_arc4_seq(sc, k1buf, sizeof k1buf);
+ g_bde_arc4_seq(sc, k2buf, sizeof k2buf);
+ g_bde_arc4_seq(sc, k3buf, sizeof k3buf);
+
+ MD5Init(&c);
+ MD5Update(&c, "0000", 4); /* XXX: for future versioning */
+ MD5Update(&c, k1buf, 16);
+ MD5Final(k1buf, &c);
+
+ rijndael_makeKey(&ki, DIR_DECRYPT, 128, k3buf);
+ bzero(k3buf, sizeof k3buf);
+ rijndael_blockDecrypt(&ci, &ki, q, G_BDE_LOCKSIZE * 8, q);
+
+ for (i = 0; i < G_BDE_LOCKSIZE; i++)
+ q[i] ^= k2buf[i];
+ bzero(k2buf, sizeof k2buf);
+
+ if (bcmp(q, k1buf, sizeof k1buf)) {
+ bzero(k1buf, sizeof k1buf);
+ bzero(buf, sectorsize * m);
+ g_free(buf);
+ off[0] = 0;
+ return (ESRCH);
+ }
+ bzero(k1buf, sizeof k1buf);
+
+ g_bde_decode_lock(gl, q);
+ bzero(buf, sectorsize * m);
+ g_free(buf);
+
+ off[1] = 0;
+ for (i = 0; i < (int)sizeof(gl->key); i++)
+ off[1] += gl->key[i];
+
+ if (off[1] == 0) {
+ off[0] = 0;
+ return (ENOENT);
+ }
+ for (i = 0; i < G_BDE_MAXKEYS; i++)
+ if (nkey != NULL && off[0] == gl->lsector[i])
+ *nkey = i;
+
+ return (0);
+}
diff --git a/sys/geom/bde/g_bde_work.c b/sys/geom/bde/g_bde_work.c
new file mode 100644
index 0000000..6f337fa
--- /dev/null
+++ b/sys/geom/bde/g_bde_work.c
@@ -0,0 +1,731 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the state-engine which makes things happen in the
+ * right order.
+ *
+ * Outline:
+ * 1) g_bde_start1()
+ * Break the struct bio into multiple work packets one per zone.
+ * 2) g_bde_start2()
+ * Setup the necessary sector buffers and start those read operations
+ * which we can start at this time and put the item on the work-list.
+ * 3) g_bde_worker()
+ * Scan the work-list for items which are ready for crypto processing
+ * and call the matching crypto function in g_bde_crypt.c and schedule
+ * any writes needed. Read operations finish here by releasing the
+ * sector buffers and delivering the original bio request.
+ * 4) g_bde_write_done()
+ * Release sector buffers and deliver the original bio request.
+ *
+ * Because of the C-scope rules, the functions are almost perfectly in the
+ * opposite order in this source file.
+ *
+ * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
+ * XXX: additional states to this state-engine. Since no hardware available
+ * XXX: at this time has AES support, implementing this has been postponed
+ * XXX: until such time as it would result in a benefit.
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
+static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
+static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp);
+static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
+static int g_bde_start_read(struct g_bde_sector *sp);
+
+/*
+ * Work item allocation.
+ *
+ * C++ would call these constructors and destructors.
+ */
+static u_int g_bde_nwork;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
+
+static struct g_bde_work *
+g_bde_new_work(struct g_bde_softc *sc)
+{
+ struct g_bde_work *wp;
+
+ wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
+ if (wp == NULL)
+ return (wp);
+ wp->state = SETUP;
+ wp->softc = sc;
+ g_bde_nwork++;
+ sc->nwork++;
+ TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
+ return (wp);
+}
+
+static void
+g_bde_delete_work(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ sc = wp->softc;
+ g_bde_nwork--;
+ sc->nwork--;
+ TAILQ_REMOVE(&sc->worklist, wp, list);
+ g_free(wp);
+}
+
+/*
+ * Sector buffer allocation
+ *
+ * These two functions allocate and free back variable sized sector buffers
+ */
+
+static u_int g_bde_nsect;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
+
+void
+g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
+{
+
+ g_bde_nsect--;
+ sc->nsect--;
+ if (sp->malloc)
+ g_free(sp->data);
+ g_free(sp);
+}
+
+struct g_bde_sector *
+g_bde_new_sector(struct g_bde_work *wp, u_int len)
+{
+ struct g_bde_sector *sp;
+
+ sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
+ if (sp == NULL)
+ return (sp);
+ if (len > 0) {
+ sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
+ if (sp->data == NULL) {
+ g_free(sp);
+ return (NULL);
+ }
+ sp->malloc = 1;
+ }
+ g_bde_nsect++;
+ wp->softc->nsect++;
+ sp->size = len;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->offset = wp->so;
+ sp->state = JUNK;
+ return (sp);
+}
+
+/*
+ * Skey sector cache.
+ *
+ * Nothing prevents two separate I/O requests from addressing the same zone
+ * and thereby needing the same skey sector. We therefore need to sequence
+ * I/O operations to the skey sectors. A certain amount of caching is also
+ * desirable, although the extent of benefit from this is not at this point
+ * determined.
+ *
+ * XXX: GEOM may be able to grow a generic caching facility at some point
+ * XXX: to support such needs.
+ */
+
+static u_int g_bde_ncache;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
+
+static struct g_bde_sector *
+g_bde_get_sector(struct g_bde_work *wp, off_t offset)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
+ sc = wp->softc;
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->offset == offset)
+ break;
+ }
+ if (sp != NULL) {
+ sp->ref++;
+ KASSERT(sp->offset == offset, ("wrong offset"));
+ KASSERT(sp->softc == wp->softc, ("wrong softc"));
+ if (sp->ref == 1)
+ sp->owner = wp;
+ } else {
+ if (!TAILQ_EMPTY(&sc->freelist))
+ sp = TAILQ_FIRST(&sc->freelist);
+ if (sp != NULL && sp->ref > 0)
+ sp = NULL;
+ if (sp == NULL) {
+ g_bde_ncache++;
+ sc->ncache++;
+ sp = g_bde_new_sector(wp, sc->sectorsize);
+ if (sp != NULL) {
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ sp->malloc = 2;
+ }
+ }
+ if (sp != NULL) {
+ sp->offset = offset;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->state = JUNK;
+ sp->error = 0;
+ }
+ }
+ if (sp != NULL) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ }
+ wp->ksp = sp;
+ KASSERT(sp != NULL, ("get_sector failed"));
+ return(sp);
+}
+
+static void
+g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp2;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp);
+ KASSERT(sp->malloc == 2, ("Wrong sector released"));
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sp->softc"));
+ KASSERT(wp == sp->owner, ("Releasing, not owner"));
+ sp->owner = NULL;
+ wp->ksp = NULL;
+ sp->ref--;
+ if (sp->ref > 0) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ TAILQ_FOREACH(wp2, &sc->worklist, list) {
+ if (wp2->ksp == sp) {
+ KASSERT(wp2 != wp, ("Self-reowning"));
+ sp->owner = wp2;
+ wakeup(sp->softc);
+ break;
+ }
+ }
+ KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
+ } else if (sp->error != 0) {
+ sp->offset = ~0;
+ sp->error = 0;
+ sp->state = JUNK;
+ }
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
+}
+
+static void
+g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
+{
+ struct g_bde_sector *sp;
+ int n;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
+ n = sc->ncache / fraction + 1;
+ while(n--) {
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->ref != 0)
+ continue;
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ g_bde_ncache--;
+ sc->ncache--;
+ bzero(sp->data, sp->size);
+ g_bde_delete_sector(sc, sp);
+ break;
+ }
+ }
+}
+
+static struct g_bde_sector *
+g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset)
+{
+ struct g_bde_sector *sp;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp);
+ sp = g_bde_get_sector(wp, offset);
+ if (sp == NULL)
+ return (sp);
+ if (sp->owner != wp)
+ return (sp);
+ if (sp->state == VALID)
+ return (sp);
+ if (g_bde_start_read(sp) == 0)
+ return (sp);
+ g_bde_release_sector(wp, sp);
+ return (NULL);
+}
+
+/*
+ * Contribute to the completion of the original bio request.
+ *
+ * We have no simple way to tell how many bits the original bio request has
+ * been segmented into, so the easiest way to determine when we can deliver
+ * it is to keep track of the number of bytes we have completed. We keep
+ * track of any errors underway and latch onto the first one.
+ *
+ * We always report "nothing done" in case of error, because random bits here
+ * and there may be completed and returning a number of completed bytes does
+ * not convey any useful information about which bytes they were. If some
+ * piece of broken code somewhere interprets this to mean that nothing has
+ * changed on the underlying media they deserve the lossage headed for them.
+ *
+ * A single mutex per g_bde instance is used to prevent contention.
+ */
+
+static void
+g_bde_contribute(struct bio *bp, off_t bytes, int error)
+{
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
+ bp, (intmax_t)bytes, error);
+ sc = bp->bio_driver1;
+ if (bp->bio_error == 0)
+ bp->bio_error = error;
+ bp->bio_completed += bytes;
+ KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
+ if (bp->bio_completed == bp->bio_length) {
+ if (bp->bio_error != 0)
+ bp->bio_completed = 0;
+ g_io_deliver(bp, bp->bio_error);
+ }
+}
+
+/*
+ * A write operation has finished. When we have all expected cows in the
+ * barn close the door and call it a day.
+ */
+
+static void
+g_bde_write_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_work *wp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ KASSERT(sp != NULL, ("NULL sp"));
+ KASSERT(sc != NULL, ("NULL sc"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner"));
+ g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
+ sp->error = bp->bio_error;
+ g_destroy_bio(bp);
+ wp = sp->owner;
+ if (wp->error == 0)
+ wp->error = sp->error;
+
+ if (wp->bp->bio_cmd == BIO_DELETE) {
+ KASSERT(sp == wp->sp, ("trashed delete op"));
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_delete_sector(sc, sp);
+ g_bde_delete_work(wp);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+
+ KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
+ KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
+ if (wp->sp == sp) {
+ g_bde_delete_sector(sc, wp->sp);
+ wp->sp = NULL;
+ } else {
+ sp->state = VALID;
+ }
+ if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ }
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+}
+
+/*
+ * Send a write request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_write(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_write_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * A read operation has finished. Mark the sector no longer iobusy and
+ * wake up the worker thread and let it do its thing.
+ */
+
+static void
+g_bde_read_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ sp->error = bp->bio_error;
+ sp->state = VALID;
+ wakeup(sc);
+ g_destroy_bio(bp);
+ mtx_unlock(&sc->worklist_mutex);
+}
+
+/*
+ * Send a read request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_read(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("Null softc in sp %p", sp));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_READ;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_read_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * The worker thread.
+ *
+ * The up/down path of GEOM is not allowed to sleep or do any major work
+ * so we use this thread to do the actual crypto operations and to push
+ * the state engine onwards.
+ *
+ * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
+ * XXX: using a thread here is probably not needed.
+ */
+
+void
+g_bde_worker(void *arg)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ struct g_geom *gp;
+ int busy, error;
+
+ gp = arg;
+ sc = gp->softc;
+
+ mtx_lock(&sc->worklist_mutex);
+ for (;;) {
+ busy = 0;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
+ TAILQ_FOREACH(wp, &sc->worklist, list) {
+ KASSERT(wp != NULL, ("NULL wp"));
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ if (wp->state != WAIT)
+ continue; /* Not interesting here */
+
+ KASSERT(wp->bp != NULL, ("NULL wp->bp"));
+ KASSERT(wp->sp != NULL, ("NULL wp->sp"));
+
+ if (wp->ksp != NULL) {
+ if (wp->ksp->owner != wp)
+ continue;
+ if (wp->ksp->state == IO)
+ continue;
+ KASSERT(wp->ksp->state == VALID,
+ ("Illegal sector state (JUNK ?)"));
+ }
+
+ if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID)
+ continue;
+
+ if (wp->ksp != NULL && wp->ksp->error != 0) {
+ g_bde_contribute(wp->bp, wp->length,
+ wp->ksp->error);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ busy++;
+ break;
+ }
+ switch(wp->bp->bio_cmd) {
+ case BIO_READ:
+ if (wp->ksp != NULL && wp->sp->error == 0) {
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_read(wp);
+ mtx_lock(&sc->worklist_mutex);
+ }
+ g_bde_contribute(wp->bp, wp->length,
+ wp->sp->error);
+ g_bde_delete_sector(sc, wp->sp);
+ if (wp->ksp != NULL)
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ break;
+ case BIO_WRITE:
+ wp->state = FINISH;
+ KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
+ KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_write(wp);
+ mtx_lock(&sc->worklist_mutex);
+ g_bde_start_write(wp->sp);
+ g_bde_start_write(wp->ksp);
+ break;
+ case BIO_DELETE:
+ wp->state = FINISH;
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_delete(wp);
+ mtx_lock(&sc->worklist_mutex);
+ g_bde_start_write(wp->sp);
+ break;
+ }
+ busy++;
+ break;
+ }
+ if (!busy) {
+ /*
+ * We don't look for our death-warrant until we are
+ * idle. Shouldn't make a difference in practice.
+ */
+ if (sc->dead)
+ break;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
+ error = msleep(sc, &sc->worklist_mutex,
+ PRIBIO, "g_bde", hz);
+ if (error == EWOULDBLOCK) {
+ /*
+ * Loose our skey cache in an orderly fashion.
+ * The exact rate can be tuned to be less
+ * aggressive if this is desirable. 10% per
+ * second means that the cache is gone in a
+ * few minutes.
+ */
+ g_bde_purge_sector(sc, 10);
+ }
+ }
+ }
+ g_trace(G_T_TOPOLOGY, "g_bde_worker die");
+ g_bde_purge_sector(sc, 1);
+ KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
+ KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
+ KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
+ mtx_unlock(&sc->worklist_mutex);
+ sc->dead = 2;
+ wakeup(sc);
+ mtx_lock(&Giant);
+ kthread_exit(0);
+}
+
+/*
+ * g_bde_start1 has chopped the incoming request up so all the requests
+ * we see here are inside a single zone. Map the data and key locations
+ * grab the buffers we need and fire off the first volley of read requests.
+ */
+
+static void
+g_bde_start2(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
+ g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
+ sc = wp->softc;
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ g_bde_map_sector(&sc->key, wp->offset, &wp->so, &wp->kso, &wp->ko);
+ if (wp->bp->bio_cmd == BIO_READ) {
+ wp->sp = g_bde_new_sector(wp, 0);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ wp->sp->size = wp->length;
+ wp->sp->data = wp->data;
+ if (g_bde_start_read(wp->sp) != 0) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_sector(sc, wp, wp->kso);
+ if (wp->ksp == NULL)
+ wp->error = ENOMEM;
+ } else if (wp->bp->bio_cmd == BIO_DELETE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else if (wp->bp->bio_cmd == BIO_WRITE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_sector(sc, wp, wp->kso);
+ if (wp->ksp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else {
+ KASSERT(0 == 1,
+ ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
+ }
+
+ wp->state = WAIT;
+ wakeup(sc);
+}
+
+/*
+ * Split the incoming bio on zone boundaries and submit the resulting
+ * work structures to g_bde_start2().
+ */
+
+void
+g_bde_start1(struct bio *bp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ off_t zone_start, left;
+ caddr_t p;
+
+ sc = bp->bio_to->geom->softc;
+ bp->bio_driver1 = sc;
+
+ mtx_lock(&sc->worklist_mutex);
+ zone_start = bp->bio_offset - bp->bio_offset % sc->zone_cont;
+ wp = g_bde_new_work(sc);
+ if (wp == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+ left = bp->bio_length;
+ p = bp->bio_data;
+
+ /* Do the first and possible only fragment */
+ wp->bp = bp;
+ wp->offset = bp->bio_offset;
+ wp->data = p;
+ wp->length = zone_start + sc->zone_cont - wp->offset;
+ if (wp->length >= left) {
+ /* Only this one fragment needed */
+ wp->length = left;
+ g_bde_start2(wp);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+
+ /* Submit the first fragment */
+ g_bde_start2(wp);
+ left -= wp->length;
+ p += wp->length;
+
+ /* Do the subsequent fragments */
+ for(;left > 0;) {
+ wp = g_bde_new_work(sc);
+ if (wp == NULL) {
+ g_bde_contribute(bp, left, ENOMEM);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+ zone_start += sc->zone_cont;
+ wp->bp = bp;
+ wp->offset = zone_start;
+ wp->data = p;
+ if (left > sc->zone_cont)
+ wp->length = sc->zone_cont;
+ else
+ wp->length = left;
+ left -= wp->length;
+ p += wp->length;
+ g_bde_start2(wp);
+ }
+ mtx_unlock(&sc->worklist_mutex);
+}
OpenPOWER on IntegriCloud